PaddlePaddle · Yancey1989 · Apr 13, 2018 · Apr 9, 2018 · Apr 9, 2018 · Apr 10, 2018
diff --git a/paddle/fluid/operators/uniform_random_table_op.cc b/paddle/fluid/operators/uniform_random_table_op.cc
@@ -0,0 +1,144 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/math/math_function.h"
+#include "paddle/fluid/platform/device_context.h"
+
+namespace paddle {
+namespace operators {
+
+class UniformRandomTableInferShape : public framework::InferShapeBase {
+ public:
+  void operator()(framework::InferShapeContext *ctx) const override {
+    VLOG(3) << "Infershape...";
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of UniformRandomTableOp should not be null.");
+
+    PADDLE_ENFORCE(
+        ctx->Attrs().Get<float>("min") < ctx->Attrs().Get<float>("max"),
+        "uniform_random's min must less then max");
+    auto &shape = ctx->Attrs().Get<std::vector<int>>("shape");
+    std::vector<int64_t> temp;
+    temp.reserve(shape.size());
+    for (auto dim : shape) {
+      temp.push_back(static_cast<int64_t>(dim));
+    }
+    ctx->SetOutputDim("Out", framework::make_ddim(temp));
+  }
+};
+
+class UniformRandomTableOp : public framework::OperatorBase {
+ public:
+  using framework::OperatorBase::OperatorBase;
+
+ private:
+  void RunImpl(const framework::Scope &scope,
+               const platform::Place &dev_place) const override {
+    VLOG(3) << "RunImpl...";
+    auto out =
+        scope.FindVar(Output("Out"))->GetMutable<framework::SelectedRows>();
+    auto shard_cnt = Attr<int>("shard_cnt");
+    auto shard_id = Attr<int>("shard_id");
+    auto max_id = Attr<int>("max_id");
+    auto shape = Attr<std::vector<int>>("shape");
+
+    auto tensor = out->mutable_value();
+    tensor->Resize(framework::make_ddim(shape));
+    // Only allocate the memory of large table on CPU
+    auto cpu = platform::CPUPlace();
+    float *data = tensor->mutable_data<float>(cpu);
+    VLOG(3) << "generate seed";
+    unsigned int seed = static_cast<unsigned int>(Attr<int>("seed"));
+    std::minstd_rand engine;
+    if (seed == 0) {
+      seed = std::random_device()();
+    }
+    engine.seed(seed);
+    std::uniform_real_distribution<float> dist(Attr<float>("min"),
+                                               Attr<float>("max"));
+    int64_t size = tensor->numel();
+    for (int64_t i = 0; i < size; ++i) {
+      data[i] = dist(engine);
+    }
+    // initialize rows by round-robin
+    // TODO(Yancey1989): need to support other way to distribute Ids
+    VLOG(3) << "calculate rows_size...";
+    int64_t rows_size = 0;
+    if (max_id % shard_cnt == 0) {
+      rows_size = max_id / shard_cnt;
+    } else {
+      rows_size = max_id / shard_cnt + 1;
+    }
+    auto *rows = out->mutable_rows();
+    rows->resize(rows_size);
+    (*rows)[0] = shard_id;
+    for (int64_t idx = 1; idx < rows_size; ++idx) {
+      (*rows)[idx] = (*rows)[idx - 1] + shard_cnt;
+    }
+    out->set_height(max_id);
+  }
+};
+
+class UniformRandomTableOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  UniformRandomTableOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+      : framework::OpProtoAndCheckerMaker(proto, op_checker) {
+    AddOutput("Out",
+              "(SelectedRows)"
+              "The output table of uniform random table op.");
+    AddComment(R"DOC(
+Uniform random operator for initializing a table. 
+
+This operator initializes a SelectedRows with random values sampled from a
+uniform distribution.
+
+)DOC");
+    AddAttr<int>("max_id",
+                 "(int, required)"
+                 "The maximal Id for the table.");
+    AddAttr<int>("shard_cnt",
+                 "(int, required)"
+                 "The count of shards for distributing the table.");
+    AddAttr<int>("shard_id", "(int, required) The current shard ID.");
+    AddAttr<std::vector<int>>("shape",
+                              "(vector<int>) The shape of the output tensor");
+    AddAttr<float>("min",
+                   "(float, default -1.0) "
+                   "Minimum value of uniform random")
+        .SetDefault(-1.0f);
+    AddAttr<float>("max",
+                   "(float, default 1.0) "
+                   "Maximun value of uniform random")
+        .SetDefault(1.0f);
+    AddAttr<int>("seed",
+                 "(int, default 0) "
+                 "Random seed used for generating samples. "
+                 "0 means use a seed generated by the system."
+                 "Note that if seed is not 0, this operator will always "
+                 "generate the same random numbers every time.")
+        .SetDefault(0);
+    AddAttr<int>("dtype", "(int, default 5(FP32)) Output tensor data type")
+        .SetDefault(framework::proto::VarType::FP32);
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(uniform_random_table, ops::UniformRandomTableOp,
+                  ops::UniformRandomTableInferShape,
+                  ops::UniformRandomTableOpMaker,
+                  paddle::framework::EmptyGradOpMaker);
diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_table_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_table_op.py
@@ -0,0 +1,66 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+from op_test import OpTest
+import paddle.fluid.core as core
+from paddle.fluid.op import Operator
+
+
+def output_hist(out):
+    hist, _ = np.histogram(out, range=(-5, 10))
+    hist = hist.astype("float32")
+    hist /= float(out.size)
+    prob = 0.1 * np.ones((10))
+    return hist, prob
+
+
+class TestUniformRandomTableOp(unittest.TestCase):
+    def get_places(self):
+        places = [core.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(core.CUDAPlace(0))
+        return places
+
+    def test_check_output(self):
+        for place in self.get_places():
+            self.check_with_place(place)
+
+    def check_with_place(self, place):
+        scope = core.Scope()
+        out = scope.var("X").get_selected_rows()
+
+        op = Operator(
+            "uniform_random_table",
+            Out="X",
+            shape=[4, 784],
+            min=-5.0,
+            max=10.0,
+            seed=10,
+            shard_cnt=3,
+            shard_id=1,
+            max_id=10)
+        op.run(scope, place)
+        self.assertEqual(out.rows(), [1, 4, 7, 10])
+        self.assertEqual(out.height(), 10)
+        self.assertEqual(out.get_tensor().shape(), [4, 784])
+        hist, prob = output_hist(np.array(out.get_tensor()))
+        self.assertTrue(
+            np.allclose(
+                hist, prob, rtol=0, atol=0.01), "hist: " + str(hist))
+
+
+if __name__ == "__main__":
+    unittest.main()