From 1436252c019dec9a92eb9c02ffe54d9c662d92bb Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Tue, 5 Sep 2023 11:46:57 +0800
Subject: [PATCH 01/14] Rebase: Rebase: Rebase: Add custom layer for
 int8-quantized LLM

---
 src/CMakeLists.txt       |  1 +
 src/layer/linearint8.cpp | 76 ++++++++++++++++++++++++++++++++++++++++
 src/layer/linearint8.h   | 37 +++++++++++++++++++
 3 files changed, 114 insertions(+)
 create mode 100644 src/layer/linearint8.cpp
 create mode 100644 src/layer/linearint8.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4a4ea24e636..4dd6812f5c0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -165,6 +165,7 @@ ncnn_add_layer(CopyTo)
 ncnn_add_layer(Erf)
 ncnn_add_layer(Diag)
 ncnn_add_layer(CELU)
+ncnn_add_layer(LinearInt8)
 
 if(NCNN_VULKAN)
     ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp)
diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp
new file mode 100644
index 00000000000..8be3c026020
--- /dev/null
+++ b/src/layer/linearint8.cpp
@@ -0,0 +1,76 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "linearint8.h"
+
+namespace ncnn {
+
+LinearInt8::LinearInt8()
+{
+    one_blob_only = true;
+    support_inplace = false;
+}
+
+int LinearInt8::load_param(const ParamDict& pd)
+{
+    in_dim = pd.get(0, 0);
+    out_dim = pd.get(1, 0);
+    group_size = pd.get(2, 1);
+    return 0;
+}
+
+int LinearInt8::load_model(const ModelBin& mb)
+{
+    scales = mb.load(in_dim * out_dim / group_size, 1);
+    weight = mb.load(in_dim * out_dim, 0);
+    if (weight.elemsize != 1)
+        return -1;
+    return 0;
+}
+
+int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
+{
+    if (bottom_blob.dims != 2 || bottom_blob.w != in_dim)
+        return -1;
+
+    int w = bottom_blob.w;
+    int h = bottom_blob.h;
+    size_t elemsize = bottom_blob.elemsize;
+
+    top_blob.create(out_dim, h, elemsize, opt.blob_allocator);
+    if (top_blob.empty())
+        return -100;
+
+    for (int j = 0; j < h; j++)
+    {
+        const float* m = bottom_blob.row(j);
+        float* out = top_blob.row(j);
+
+#pragma omp parallel for num_threads(opt.num_threads)
+        for (int p = 0; p < out_dim; p++)
+        {
+            int base = w * p;
+            out[p] = 0;
+            for (int i = 0; i < w; i++)
+            {
+                int index = base + i;
+                out[p] += m[i] * ((const int8_t*)weight)[index] * scales[index / group_size];
+            }
+        }
+    }
+
+    return 0;
+}
+
+} // namespace ncnn
\ No newline at end of file
diff --git a/src/layer/linearint8.h b/src/layer/linearint8.h
new file mode 100644
index 00000000000..374a139eb1b
--- /dev/null
+++ b/src/layer/linearint8.h
@@ -0,0 +1,37 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "layer.h"
+
+namespace ncnn {
+
+class LinearInt8 : public Layer
+{
+public:
+    LinearInt8();
+
+    virtual int load_param(const ParamDict& pd);
+
+    virtual int load_model(const ModelBin& mb);
+
+    virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+
+    int in_dim;
+    int out_dim;
+    int group_size;
+    Mat weight;
+    Mat scales;
+};
+
+} // namespace ncnn
\ No newline at end of file

From 6e8d028b6e804639ac16072a3ffecc53380a46da Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Tue, 5 Sep 2023 11:48:41 +0800
Subject: [PATCH 02/14] Fix trailing newlines

---
 src/layer/linearint8.cpp | 2 +-
 src/layer/linearint8.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp
index 8be3c026020..b58ba086cd5 100644
--- a/src/layer/linearint8.cpp
+++ b/src/layer/linearint8.cpp
@@ -73,4 +73,4 @@ int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt
     return 0;
 }
 
-} // namespace ncnn
\ No newline at end of file
+} // namespace ncnn
diff --git a/src/layer/linearint8.h b/src/layer/linearint8.h
index 374a139eb1b..a4287307d87 100644
--- a/src/layer/linearint8.h
+++ b/src/layer/linearint8.h
@@ -34,4 +34,4 @@ class LinearInt8 : public Layer
     Mat scales;
 };
 
-} // namespace ncnn
\ No newline at end of file
+} // namespace ncnn

From 3b40f308aba7af575c19cc3458c0453ea398d8ae Mon Sep 17 00:00:00 2001
From: lrw04 <lrw04@users.noreply.github.com>
Date: Tue, 5 Sep 2023 03:50:44 +0000
Subject: [PATCH 03/14] apply code-format changes

---
 src/layer/linearint8.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp
index b58ba086cd5..30d87948f51 100644
--- a/src/layer/linearint8.cpp
+++ b/src/layer/linearint8.cpp
@@ -57,7 +57,7 @@ int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt
         const float* m = bottom_blob.row(j);
         float* out = top_blob.row(j);
 
-#pragma omp parallel for num_threads(opt.num_threads)
+        #pragma omp parallel for num_threads(opt.num_threads)
         for (int p = 0; p < out_dim; p++)
         {
             int base = w * p;

From 2ecd5afc4af3bdc8f974e22455a0d3f91ce820dd Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Tue, 5 Sep 2023 17:04:19 +0800
Subject: [PATCH 04/14] Add more error checking

---
 src/layer/linearint8.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp
index 30d87948f51..8b3d52cddfb 100644
--- a/src/layer/linearint8.cpp
+++ b/src/layer/linearint8.cpp
@@ -27,6 +27,8 @@ int LinearInt8::load_param(const ParamDict& pd)
     in_dim = pd.get(0, 0);
     out_dim = pd.get(1, 0);
     group_size = pd.get(2, 1);
+    if (in_dim * out_dim % group_size)
+        return -1;
     return 0;
 }
 

From 5184b87fd23728d8ac89c38c11b90aa321edb7d7 Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Tue, 5 Sep 2023 21:37:28 +0800
Subject: [PATCH 05/14] Add include guard

---
 src/layer/linearint8.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/layer/linearint8.h b/src/layer/linearint8.h
index a4287307d87..981177d1c8b 100644
--- a/src/layer/linearint8.h
+++ b/src/layer/linearint8.h
@@ -12,6 +12,9 @@
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
+#ifndef LAYER_LINEARINT8_H
+#define LAYER_LINEARINT8_H
+
 #include "layer.h"
 
 namespace ncnn {
@@ -35,3 +38,5 @@ class LinearInt8 : public Layer
 };
 
 } // namespace ncnn
+
+#endif // LAYER_LINEARINT8_H

From e53be3cac957011ca03da09ffe23668fe8973091 Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Wed, 6 Sep 2023 15:51:12 +0800
Subject: [PATCH 06/14] Initial commit for tests

---
 tests/CMakeLists.txt      |  1 +
 tests/test_linearint8.cpp | 78 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 tests/test_linearint8.cpp

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 21de08c6ff3..bdf0dee43d1 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -114,6 +114,7 @@ ncnn_add_layer_test(InnerProduct)
 ncnn_add_layer_test(InstanceNorm)
 ncnn_add_layer_test(Interp)
 ncnn_add_layer_test(LayerNorm)
+ncnn_add_layer_test(LinearInt8)
 ncnn_add_layer_test(LRN)
 ncnn_add_layer_test(LSTM)
 ncnn_add_layer_test(MatMul)
diff --git a/tests/test_linearint8.cpp b/tests/test_linearint8.cpp
new file mode 100644
index 00000000000..d158cf45fe2
--- /dev/null
+++ b/tests/test_linearint8.cpp
@@ -0,0 +1,78 @@
+// TODO
+
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "layer/linearint8.h"
+#include "testutil.h"
+
+static int test_linearint8(const ncnn::Mat& a, int in_dim, int out_dim, int group_size)
+{
+    if (in_dim * out_dim % group_size)
+    {
+        fprintf(stderr, "malformed test case: in_dim=%d out_dim=%d group_size=%d\n", in_dim, out_dim, group_size);
+        return -1;
+    }
+    if (a.w != in_dim)
+    {
+        fprintf(stderr, "malformed test case: in_dim=%d out_dim=%d group_size=%d\n", in_dim, out_dim, group_size);
+        return -1;
+    }
+    ncnn::ParamDict pd;
+    pd.set(0, in_dim);
+    pd.set(1, out_dim);
+    pd.set(2, group_size);
+
+    std::vector<ncnn::Mat> weights(2);
+    weights[0] = RandomMat(in_dim * out_dim / group_size);
+    weights[1] = RandomS8Mat(in_dim * out_dim);
+
+    int ret = test_layer<ncnn::LinearInt8>("LinearInt8", pd, weights, a);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_linearint8 failed a.dims=%d a=(%d, %d) in_dim=%d out_dim=%d group_size=%d\n", a.dims, a.h, a.w, in_dim, out_dim, group_size);
+    }
+
+    return ret;
+}
+
+static int test_lrn_0()
+{
+    ncnn::Mat a = RandomMat(10, 1);
+
+    return 0
+           || test_linearint8(a, 10, 6, 4)
+           || test_linearint8(a, 10, 8, 4)
+           || test_linearint8(a, 10, 10, 4);
+}
+
+static int test_lrn_1()
+{
+    ncnn::Mat a = RandomMat(16, 1);
+
+    return 0
+           || test_linearint8(a, 16, 6, 16)
+           || test_linearint8(a, 16, 6, 16)
+           || test_linearint8(a, 16, 6, 16)
+           || test_linearint8(a, 16, 6, 16);
+}
+
+int main()
+{
+    SRAND(7767517);
+
+    return 0
+           || test_lrn_0()
+           || test_lrn_1();
+}

From 01f30ecc00b6fe6d54291ac2fc65a4bebcf64702 Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Wed, 6 Sep 2023 16:23:55 +0800
Subject: [PATCH 07/14] Change test function names

---
 tests/test_linearint8.cpp | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tests/test_linearint8.cpp b/tests/test_linearint8.cpp
index d158cf45fe2..5f7af39af88 100644
--- a/tests/test_linearint8.cpp
+++ b/tests/test_linearint8.cpp
@@ -47,7 +47,7 @@ static int test_linearint8(const ncnn::Mat& a, int in_dim, int out_dim, int grou
     return ret;
 }
 
-static int test_lrn_0()
+static int test_linearint8_0()
 {
     ncnn::Mat a = RandomMat(10, 1);
 
@@ -57,14 +57,11 @@ static int test_lrn_0()
            || test_linearint8(a, 10, 10, 4);
 }
 
-static int test_lrn_1()
+static int test_linearint8_1()
 {
     ncnn::Mat a = RandomMat(16, 1);
 
     return 0
-           || test_linearint8(a, 16, 6, 16)
-           || test_linearint8(a, 16, 6, 16)
-           || test_linearint8(a, 16, 6, 16)
            || test_linearint8(a, 16, 6, 16);
 }
 
@@ -73,6 +70,6 @@ int main()
     SRAND(7767517);
 
     return 0
-           || test_lrn_0()
-           || test_lrn_1();
+           || test_linearint8_0()
+           || test_linearint8_1();
 }

From 52cbc590308f1bf54ef4e25eb56e5971d87cb5ea Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Wed, 6 Sep 2023 17:09:46 +0800
Subject: [PATCH 08/14] Add general optimizations

---
 src/layer/arm/linearint8_arm.cpp | 91 ++++++++++++++++++++++++++++++++
 src/layer/arm/linearint8_arm.h   | 31 +++++++++++
 src/layer/linearint8.cpp         |  7 ++-
 3 files changed, 127 insertions(+), 2 deletions(-)
 create mode 100644 src/layer/arm/linearint8_arm.cpp
 create mode 100644 src/layer/arm/linearint8_arm.h

diff --git a/src/layer/arm/linearint8_arm.cpp b/src/layer/arm/linearint8_arm.cpp
new file mode 100644
index 00000000000..59369c2c754
--- /dev/null
+++ b/src/layer/arm/linearint8_arm.cpp
@@ -0,0 +1,91 @@
+#include "linearint8_arm.h"
+
+#if __ARM_NEON
+#include <arm_neon.h>
+#endif // __ARM_NEON
+
+namespace ncnn {
+
+int LinearInt8_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
+{
+    if (bottom_blob.dims != 2 || bottom_blob.w != in_dim)
+        return -1;
+
+    int w = bottom_blob.w;
+    int h = bottom_blob.h;
+    size_t elemsize = bottom_blob.elemsize;
+
+    top_blob.create(out_dim, h, elemsize, opt.blob_allocator);
+    if (top_blob.empty())
+        return -100;
+
+    const int8_t* wt = (const int8_t*)weight;
+
+#if (__ARM_NEON && __aarch64__)
+
+    float zero = 0.0f;
+
+    if (!(w % group_size) && !(group_size % 8))
+    {
+        for (int j = 0; j < h; j++)
+        {
+            const float* m = bottom_blob.row(j);
+            float* out = top_blob.row(j);
+
+#pragma omp parallel for num_threads(opt.num_threads)
+            for (int p = 0; p < out_dim; p++)
+            {
+                int base = w * p;
+                float32x4_t acc_p0 = vld1q_dup_f32(&zero), acc_p1 = vld1q_dup_f32(&zero);
+                for (int k = 0; k < w; k += group_size)
+                {
+                    int scales_index = (base + k) / group_size;
+                    int index = base + k;
+                    const float* sc = (const float*)scales + scales_index;
+                    for (int i = 0, ind = index; i < group_size; i += 8, ind += 8)
+                    {
+                        int8x8_t i8x8 = vld1_s8(wt + ind);
+                        int16x8_t i16x8 = vmovl_s8(i8x8);
+                        int32x4_t i32_0 = vmovl_s16(vget_low_s16(i16x8));
+                        int32x4_t i32_1 = vmovl_s16(vget_high_s16(i16x8));
+                        float32x4_t wt_p0 = vcvtq_f32_s32(i32_0);
+                        float32x4_t wt_p1 = vcvtq_f32_s32(i32_1);
+                        float32x4_t m_p0 = vld1q_f32(m + k + i);
+                        float32x4_t m_p1 = vld1q_f32(m + k + i + 4);
+                        float32x4_t sc_p = vld1q_dup_f32(sc);
+                        float32x4_t acc_real0 = vmulq_f32(wt_p0, sc_p);
+                        float32x4_t acc_real1 = vmulq_f32(wt_p1, sc_p);
+                        acc_p0 = vmlaq_f32(acc_p0, m_p0, acc_real0);
+                        acc_p1 = vmlaq_f32(acc_p1, m_p1, acc_real1);
+                    }
+                }
+                out[p] = vaddvq_f32(acc_p0) + vaddvq_f32(acc_p1);
+            }
+        }
+        return 0;
+    }
+#endif
+
+    for (int j = 0; j < h; j++)
+    {
+        const float* m = bottom_blob.row(j);
+        float* out = top_blob.row(j);
+
+#pragma omp parallel for num_threads(opt.num_threads)
+        for (int p = 0; p < out_dim; p++)
+        {
+            int base = w * p;
+            float acc = 0.0f;
+            for (int i = 0, index = base, scales_index = index / group_size; i < w; i++, index++)
+            {
+                acc += m[i] * wt[index] * scales[scales_index];
+                if (index % group_size == group_size - 1) scales_index++;
+            }
+            out[p] = acc;
+        }
+    }
+
+    return 0;
+}
+
+} // namespace ncnn
\ No newline at end of file
diff --git a/src/layer/arm/linearint8_arm.h b/src/layer/arm/linearint8_arm.h
new file mode 100644
index 00000000000..76b2255f4fa
--- /dev/null
+++ b/src/layer/arm/linearint8_arm.h
@@ -0,0 +1,31 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef LAYER_LINEARINT8_ARM_H
+#define LAYER_LINEARINT8_ARM_H
+
+#include "net.h"
+#include "linearint8.h"
+
+namespace ncnn {
+
+class LinearInt8_arm : virtual public LinearInt8
+{
+public:
+    virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+};
+
+} // namespace ncnn
+
+#endif // LAYER_LINEARINT8_H
diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp
index 8b3d52cddfb..3967478992b 100644
--- a/src/layer/linearint8.cpp
+++ b/src/layer/linearint8.cpp
@@ -54,6 +54,8 @@ int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt
     if (top_blob.empty())
         return -100;
 
+    const int8_t *wt = (const int8_t *)weight;
+
     for (int j = 0; j < h; j++)
     {
         const float* m = bottom_blob.row(j);
@@ -63,12 +65,13 @@ int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt
         for (int p = 0; p < out_dim; p++)
         {
             int base = w * p;
-            out[p] = 0;
+            float acc = 0.0f;
             for (int i = 0; i < w; i++)
             {
                 int index = base + i;
-                out[p] += m[i] * ((const int8_t*)weight)[index] * scales[index / group_size];
+                acc += m[i] * wt[index] * scales[index / group_size];
             }
+            out[p] = acc;
         }
     }
 

From 9426487871e69cffaf2d359090f97cf15a60333f Mon Sep 17 00:00:00 2001
From: lrw04 <lrw04@users.noreply.github.com>
Date: Wed, 6 Sep 2023 09:12:02 +0000
Subject: [PATCH 09/14] apply code-format changes

---
 src/layer/arm/linearint8_arm.cpp | 4 ++--
 src/layer/linearint8.cpp         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/layer/arm/linearint8_arm.cpp b/src/layer/arm/linearint8_arm.cpp
index 59369c2c754..1608339b6fb 100644
--- a/src/layer/arm/linearint8_arm.cpp
+++ b/src/layer/arm/linearint8_arm.cpp
@@ -32,7 +32,7 @@ int LinearInt8_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
             const float* m = bottom_blob.row(j);
             float* out = top_blob.row(j);
 
-#pragma omp parallel for num_threads(opt.num_threads)
+            #pragma omp parallel for num_threads(opt.num_threads)
             for (int p = 0; p < out_dim; p++)
             {
                 int base = w * p;
@@ -71,7 +71,7 @@ int LinearInt8_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
         const float* m = bottom_blob.row(j);
         float* out = top_blob.row(j);
 
-#pragma omp parallel for num_threads(opt.num_threads)
+        #pragma omp parallel for num_threads(opt.num_threads)
         for (int p = 0; p < out_dim; p++)
         {
             int base = w * p;
diff --git a/src/layer/linearint8.cpp b/src/layer/linearint8.cpp
index 3967478992b..6c3e2d7da10 100644
--- a/src/layer/linearint8.cpp
+++ b/src/layer/linearint8.cpp
@@ -54,7 +54,7 @@ int LinearInt8::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt
     if (top_blob.empty())
         return -100;
 
-    const int8_t *wt = (const int8_t *)weight;
+    const int8_t* wt = (const int8_t*)weight;
 
     for (int j = 0; j < h; j++)
     {

From d2ce74bb7fc2a4faf6d28d16c6c234a9f235bdf7 Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Thu, 7 Sep 2023 14:29:52 +0800
Subject: [PATCH 10/14] Fix int8 weights being converted into fp16 or bf16

---
 tests/testutil.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/testutil.h b/tests/testutil.h
index b879fa527fb..a826ca78955 100644
--- a/tests/testutil.h
+++ b/tests/testutil.h
@@ -1458,6 +1458,11 @@ int test_layer_opt(const char* layer_type, const ncnn::ParamDict& pd, const std:
         weights_fp16.resize(weights.size());
         for (size_t j = 0; j < weights.size(); j++)
         {
+            if (weights[j].elemsize != 4)
+            {
+                weights_fp16[j] = weights[j].clone();
+                continue;
+            }
             ncnn::Mat tmp;
             ncnn::cast_float32_to_bfloat16(weights[j], tmp, opt);
             ncnn::cast_bfloat16_to_float32(tmp, weights_fp16[j], opt);
@@ -1469,6 +1474,11 @@ int test_layer_opt(const char* layer_type, const ncnn::ParamDict& pd, const std:
         weights_fp16.resize(weights.size());
         for (size_t j = 0; j < weights.size(); j++)
         {
+            if (weights[j].elemsize != 4)
+            {
+                weights_fp16[j] = weights[j].clone();
+                continue;
+            }
             ncnn::Mat tmp;
             ncnn::cast_float32_to_float16(weights[j], tmp, opt);
             ncnn::cast_float16_to_float32(tmp, weights_fp16[j], opt);

From 61d6c8407f423427da29195dfa0e5a55f7c1a522 Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Thu, 7 Sep 2023 22:18:45 +0800
Subject: [PATCH 11/14] [skip ci] Add a trailing newline?

---
 src/layer/arm/linearint8_arm.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/layer/arm/linearint8_arm.cpp b/src/layer/arm/linearint8_arm.cpp
index 1608339b6fb..b0cff23423b 100644
--- a/src/layer/arm/linearint8_arm.cpp
+++ b/src/layer/arm/linearint8_arm.cpp
@@ -88,4 +88,4 @@ int LinearInt8_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
     return 0;
 }
 
-} // namespace ncnn
\ No newline at end of file
+} // namespace ncnn

From f4e961a607ca49ff2f6bf8710833c4effce3e51e Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Fri, 8 Sep 2023 10:44:01 +0800
Subject: [PATCH 12/14] Remove TODO banner

---
 tests/test_linearint8.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/test_linearint8.cpp b/tests/test_linearint8.cpp
index 5f7af39af88..a4cea2a027b 100644
--- a/tests/test_linearint8.cpp
+++ b/tests/test_linearint8.cpp
@@ -1,5 +1,3 @@
-// TODO
-
 // Tencent is pleased to support the open source community by making ncnn available.
 //
 // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.

From 6be81969d7d210c32c74fd9fa8d9522bd021b923 Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Sun, 10 Sep 2023 18:50:46 +0800
Subject: [PATCH 13/14] Update documentation

---
 docs/developer-guide/operation-param-weight-table.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/developer-guide/operation-param-weight-table.md b/docs/developer-guide/operation-param-weight-table.md
index aa5c99adf5e..fb77d5c250b 100644
--- a/docs/developer-guide/operation-param-weight-table.md
+++ b/docs/developer-guide/operation-param-weight-table.md
@@ -151,6 +151,9 @@
 ||2|width_scale|1.f|
 ||3|output_height|0|
 ||4|output_width|0|
+|LinearInt8|0|in_dim|0|scale weight|
+||1|out_dim|0|
+||2|group_size|0|
 |Log|0|base|-1.f|
 ||1|scale|1.f|
 ||2|shift|0.f|

From 68b4b2d16ac413175250d02fc07f219cda3b3204 Mon Sep 17 00:00:00 2001
From: Leran Wang <2428592483@qq.com>
Date: Sun, 10 Sep 2023 18:59:46 +0800
Subject: [PATCH 14/14] Update operators table

---
 docs/developer-guide/operators.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md
index 56e7516a36a..6db563b429f 100644
--- a/docs/developer-guide/operators.md
+++ b/docs/developer-guide/operators.md
@@ -45,6 +45,7 @@
 * [InstanceNorm](#instancenorm)
 * [Interp](#interp)
 * [LayerNorm](#layernorm)
+* [LinearInt8](#linearint8)
 * [Log](#log)
 * [LRN](#lrn)
 * [LSTM](#lstm)
@@ -1104,6 +1105,24 @@ y = x * gamma + beta by elementwise
 | gamma_data    | float | [affine_size]         |
 | beta_data     | float | [affine_size]         |
 
+# LinearInt8
+```
+y = x (WS)^T
+```
+
+* one_blob_only
+
+| param id  | name          | type  | default   | description       |
+| --------- | ------------- | ----- | --------- | ----------------- |
+| 0         | in_dim          | int | 0      |                   |
+| 1         | out_dim         | int | 0       |                   |
+| 2         | group_size         | int | 0       |                   |
+
+| weight        | type  | shape                 |
+| ------------- | ----- | --------------------- |
+| scale   | float | [in_dim * out_dim / group_size] |
+| weight     | int8 | [in_dim, out_dim]          |
+
 # Log
 ```
 if base == -1   y = log(shift + x * scale)