Skip to content

Commit

Permalink
Return the fanout histogram in a vector, not a map.
Browse files Browse the repository at this point in the history
Change-Id: I742dcf882493f27ec3d35514bc8e9ff77b71686b
Reviewed-on: https://code-review.googlesource.com/c/re2/+/54334
Reviewed-by: Paul Wankadia <junyer@google.com>
  • Loading branch information
junyer committed Apr 5, 2020
1 parent f53133c commit 58141dc
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 6 deletions.
4 changes: 2 additions & 2 deletions re2/fuzzing/re2_fuzzer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
#include <fuzzer/FuzzedDataProvider.h>
#include <stddef.h>
#include <stdint.h>
#include <map>
#include <memory>
#include <queue>
#include <string>
#include <vector>

#include "re2/prefilter.h"
#include "re2/re2.h"
Expand Down Expand Up @@ -87,7 +87,7 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,

// Don't waste time fuzzing high-fanout programs.
// They can cause bug reports due to fuzzer timeouts.
std::map<int, int> histogram;
std::vector<int> histogram;
int fanout = re.ProgramFanout(&histogram);
if (fanout > 9)
return;
Expand Down
34 changes: 34 additions & 0 deletions re2/re2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,40 @@ static int FindMSBSet(uint32_t n) {
#endif
}

static int Fanout(Prog* prog, std::vector<int>* histogram) {
SparseArray<int> fanout(prog->size());
prog->Fanout(&fanout);
int data[32] = {};
int size = 0;
for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
if (i->value() == 0)
continue;
uint32_t value = i->value();
int bucket = FindMSBSet(value);
bucket += value & (value-1) ? 1 : 0;
++data[bucket];
size = std::max(size, bucket+1);
}
if (histogram != NULL)
histogram->assign(data, data+size);
return size-1;
}

int RE2::ProgramFanout(std::vector<int>* histogram) const {
if (prog_ == NULL)
return -1;
return Fanout(prog_, histogram);
}

int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
if (prog_ == NULL)
return -1;
Prog* prog = ReverseProg();
if (prog == NULL)
return -1;
return Fanout(prog, histogram);
}

static int Fanout(Prog* prog, std::map<int, int>* histogram) {
SparseArray<int> fanout(prog->size());
prog->Fanout(&fanout);
Expand Down
12 changes: 9 additions & 3 deletions re2/re2.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
#include <map>
#include <mutex>
#include <string>
#include <vector>

#if defined(__APPLE__)
#include <TargetConditionals.h>
Expand Down Expand Up @@ -291,7 +292,13 @@ class RE2 {
int ProgramSize() const;
int ReverseProgramSize() const;

// EXPERIMENTAL! SUBJECT TO CHANGE!
// If histogram is not null, outputs the program fanout
// as a histogram bucketed by powers of 2.
// Returns the number of the largest non-empty bucket.
int ProgramFanout(std::vector<int>* histogram) const;
int ReverseProgramFanout(std::vector<int>* histogram) const;

// DEPRECATED! WILL BE REMOVED!
// Outputs the program fanout as a histogram bucketed by powers of 2.
// Returns the number of the largest non-empty bucket.
int ProgramFanout(std::map<int, int>* histogram) const;
Expand Down Expand Up @@ -630,8 +637,7 @@ class RE2 {
Encoding encoding() const { return encoding_; }
void set_encoding(Encoding encoding) { encoding_ = encoding; }

// Legacy interface to encoding.
// TODO(rsc): Remove once clients have been converted.
// DEPRECATED! WILL BE REMOVED!
bool utf8() const { return encoding_ == EncodingUTF8; }
void set_utf8(bool b) {
if (b) {
Expand Down
3 changes: 2 additions & 1 deletion re2/testing/re2_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <map>
#include <string>
#include <utility>
#include <vector>
#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
#include <sys/mman.h>
#include <unistd.h> /* for sysconf */
Expand Down Expand Up @@ -473,7 +474,7 @@ TEST(ProgramFanout, BigProgram) {
RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");

std::map<int, int> histogram;
std::vector<int> histogram;

// 3 is the largest non-empty bucket and has 1 element.
ASSERT_EQ(3, re1.ProgramFanout(&histogram));
Expand Down

0 comments on commit 58141dc

Please sign in to comment.