Skip to content

Commit

Permalink
Update CCT on PULP with Tiling
Browse files Browse the repository at this point in the history
  • Loading branch information
runwangdl committed Feb 12, 2025
1 parent c470af4 commit cd2ee51
Show file tree
Hide file tree
Showing 26 changed files with 944 additions and 148 deletions.
75 changes: 74 additions & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ jobs:
ICCT_ITA_8
miniMobileNet
miniMobileNetv2
CCT
CCT/CCT_16_16_8

### CortexM Tests ###
Expand Down Expand Up @@ -212,7 +212,14 @@ jobs:
testBacktracking
testFloatAdder
testFloatGEMM
testFloat2DConvolution
testFloatLayerNorm
testFloatRelu
testFloatMaxPool
testFloatMatmul
testFloatSoftmax
testFloatTranspose
testFloatMul
num-cores: 8

siracusa-models:
Expand Down Expand Up @@ -268,6 +275,38 @@ jobs:
{
"name": "testFloatGEMM",
"L1": [8000]
},
{
"name": "testFloat2DConvolution",
"L1": [2000]
},
{
"name": "testFloatLayerNorm",
"L1": [2000]
},
{
"name": "testFloatRelu",
"L1": [2000]
},
{
"name": "testFloatMaxPool",
"L1": [2000]
},
{
"name": "testFloatMatmul",
"L1": [2000]
},
{
"name": "testFloatSoftmax",
"L1": [4000]
},
{
"name": "testFloatTranspose",
"L1": [2000]
},
{
"name": "testFloatMul",
"L1": [2000]
}
]
num-cores: 8
Expand Down Expand Up @@ -312,6 +351,38 @@ jobs:
{
"name": "testFloatGEMM",
"L1": [8000]
},
{
"name": "testFloat2DConvolution",
"L1": [4000]
},
{
"name": "testFloatLayerNorm",
"L1": [2000]
},
{
"name": "testFloatRelu",
"L1": [2000]
},
{
"name": "testFloatMaxPool",
"L1": [2000]
},
{
"name": "testFloatMatmul",
"L1": [5000]
},
{
"name": "testFloatSoftmax",
"L1": [8000]
},
{
"name": "testFloatTranspose",
"L1": [2000]
},
{
"name": "testFloatMul",
"L1": [2000]
}
]
num-cores: 8
Expand Down Expand Up @@ -342,6 +413,8 @@ jobs:
L1: [64000]
- name: "MLPerf/AnomalyDetection"
L1: [64000]
- name: "CCT/CCT_16_16_8"
L1: [64000]
num-cores:
- 8
uses: ./.github/workflows/TestRunnerTiledSiracusa.yml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,17 @@

from typing import Tuple

from Deeploy.DeeployTypes import CodeTransformationPass, ExecutionBlock, NetworkContext, NodeTemplate
from Deeploy.DeeployTypes import CodeGenVerbosity, CodeTransformationPass, ExecutionBlock, NetworkContext, \
NodeTemplate, _NoVerbosity


class ProfilingCodeGeneration(CodeTransformationPass):

def apply(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
name: str) -> Tuple[NetworkContext, ExecutionBlock]:
def apply(self,
ctxt: NetworkContext,
executionBlock: ExecutionBlock,
name: str,
verbose: CodeGenVerbosity = _NoVerbosity) -> Tuple[NetworkContext, ExecutionBlock]:
executionBlock.addLeft(NodeTemplate("""
uint32_t ${op}_cycles = getCycles();
"""), {"op": name})
Expand Down
24 changes: 23 additions & 1 deletion Deeploy/Targets/Generic/Layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,13 @@ def __init__(self, maps: List[NodeMapper]):

def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation,
channels_first) -> Tuple[Shape, Shape]:
outputShapes = inputShapes.copy()

if len(inputShapes[0]) > len(inputShapes[1]):
inputShapes[1] = inputShapes[0]
else:
inputShapes[0] = inputShapes[1]

outputShapes = [inputShapes[0]]
return (inputShapes, outputShapes)

def computeOps(self):
Expand All @@ -172,6 +173,27 @@ def computeOps(self):
return 2 * self.mapper.parser.operatorRepresentation['M'] * self.mapper.parser.operatorRepresentation[
'N'] * self.mapper.parser.operatorRepresentation['O'] * self.mapper.parser.operatorRepresentation['batch']

def computeShapes(self, inputShapes: Tuple[Shape, Shape], outputShapes: Shape, operatorRepresentation,
channels_first) -> Tuple[Tuple[Shape, Shape], Shape]:

A_shape, B_shape = inputShapes
if len(A_shape) < 2:
A_shape = [1] * (2 - len(A_shape)) + A_shape

if len(B_shape) < 2:
B_shape = B_shape + [1] * (2 - len(B_shape))

if A_shape[-1] != B_shape[-2]:
raise ValueError(f"MatMul requires A.shape[-1] == B.shape[-2], but got {A_shape} and {B_shape}")

if len(A_shape) > len(B_shape):
B_shape = [1] * (len(A_shape) - len(B_shape)) + list(B_shape)

elif len(A_shape) < len(B_shape):
A_shape = [1] * (len(B_shape) - len(A_shape)) + list(A_shape)

return [A_shape, B_shape], outputShapes


class RQMatMulLayer(MatMulLayer):

Expand Down
2 changes: 1 addition & 1 deletion Deeploy/Targets/Generic/Templates/FloatMatMulTemplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from Deeploy.DeeployTypes import NodeTemplate

referenceTemplate = NodeTemplate("""
// GEMM (Name: ${nodeName}, Op: ${nodeOp})
// Matmul (Name: ${nodeName}, Op: ${nodeOp})
BEGIN_SINGLE_CORE
${A_type.typeName} ref_${data_out}_${A} = ${A};
${B_type.typeName} ref_${data_out}_${B} = ${B};
Expand Down
67 changes: 58 additions & 9 deletions Deeploy/Targets/PULPOpen/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,28 @@

from Deeploy.AbstractDataTypes import PointerClass
from Deeploy.CommonExtensions.CodeTransformationPasses.Closure import ClosureGeneration, MemoryAwareClosureGeneration
from Deeploy.CommonExtensions.CodeTransformationPasses.CycleMeasurement import ProfilingCodeGeneration
from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
MemoryManagementGeneration
from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, float32_t, int8_t, int32_t, \
uint8_t
from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeTemplate
from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding
from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
from Deeploy.Targets.Generic.Templates import ConcatTemplate, FloatGemmTemplate, RQSiGELUTemplate, iHardswishTemplate
from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, GELUChecker, GEMMChecker, HardswishChecker, \
LayerNormChecker, MatMulChecker, MulChecker, ReduceMeanChecker, RQAddChecker, RQHardswishChecker, SliceChecker, \
SoftmaxChecker, TransposeChecker
from Deeploy.Targets.Generic.Templates import ConcatTemplate, FloatGELUTemplate, FloatGemmTemplate, \
FloatLayernormTemplate, FloatMatMulTemplate, FloatMulTemplate, FloatReluTemplate, FloatSoftmaxTemplate, \
GatherTemplate, RQSiGELUTemplate, iHardswishTemplate
from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, ConvChecker, GatherChecker, GELUChecker, GEMMChecker, \
HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, ReduceMeanChecker, ReluChecker, RQAddChecker, \
RQHardswishChecker, SliceChecker, SoftmaxChecker, TransposeChecker
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling
from Deeploy.Targets.PULPOpen.DataTypes import PULPDMAFuture
from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, GEMMTemplate, MatrixVectorTemplate, MaxPool2DTemplate, \
MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, RQAddTemplate, RQSiHardswishTemplate, SliceTemplate, \
TallGEMMTemplate, TransposeTemplate, UniformRequantShiftTemplate, iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, FloatConvTemplate, FloatMaxPoolTemplate, GEMMTemplate, \
MatrixVectorTemplate, MaxPool2DTemplate, MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, RQAddTemplate, \
RQSiHardswishTemplate, SliceTemplate, TallGEMMTemplate, TransposeTemplate, UniformRequantShiftTemplate, \
iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.PULPOpen.TypeCheckers import PULPConvChecker, PULPLinearChecker, PULPMaxPoolChecker, \
PULPRequantShiftChecker
from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement
Expand Down Expand Up @@ -120,6 +124,7 @@
MemoryManagementGeneration("L3.*"),
MemoryManagementGeneration("L2"),
MemoryManagementGeneration(),
ProfilingCodeGeneration()
])

ClusterTransformer = CodeTransformation([
Expand All @@ -136,12 +141,14 @@
MemoryManagementGeneration("L2"),
MemoryManagementGeneration("L3.*"),
MemoryManagementGeneration(),
ProfilingCodeGeneration()
])

SimpleTransformer = CodeTransformation([
MemoryManagementGeneration("L2"),
MemoryManagementGeneration("L3.*"),
MemoryManagementGeneration(),
ProfilingCodeGeneration()
])

PULPDMASliceBindings = [
Expand Down Expand Up @@ -204,6 +211,13 @@
ForkTransformer)
]

PULPFloatConv2DBindings = [
NodeBinding(
ConvChecker([PointerClass(float32_t), PointerClass(float32_t),
PointerClass(float32_t)], [PointerClass(float32_t)]), FloatConvTemplate.reference2DTemplate,
ForkTransformer)
]

PULPRQSMatrixVecBindings = [
NodeBinding(
PULPLinearChecker([PointerClass(type1),
Expand All @@ -227,6 +241,9 @@
PULPMaxPool2DBindings = [
NodeBinding(PULPMaxPoolChecker([PointerClass(type)], [PointerClass(type)]),
MaxPool2DTemplate.PULPMaxPool2D_8_Template, ForkTransformer) for type in [int8_t, uint8_t]
] + [
NodeBinding(PULPMaxPoolChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatMaxPoolTemplate.referenceTemplate, ForkTransformer)
]

PULPConv1DBinding = NodeBinding(
Expand All @@ -241,8 +258,13 @@
PointerClass(int32_t),
PointerClass(int32_t)], [PointerClass(int8_t)]), ConvTemplate.PULPDWConv1D_8_Template, ForkTransformer)

PULPMatMulBinding = NodeBinding(MatMulChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),
GEMMTemplate.PULPMM_8_Template, ClusterTransformer)
PULPMatMulBindings = [
NodeBinding(MatMulChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),
GEMMTemplate.PULPMM_8_Template, ClusterTransformer)
] + [
NodeBinding(MatMulChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatMatMulTemplate.referenceTemplate, ClusterTransformer)
]

PULPReduceMeanBindings = [
NodeBinding(ReduceMeanChecker([PointerClass(type)], [PointerClass(type)]), ReduceMeanTemplate.referenceTemplate,
Expand Down Expand Up @@ -271,11 +293,17 @@
PULPSoftmaxBindings = [
NodeBinding(SoftmaxChecker([PointerClass(_type)], [PointerClass(uint8_t)]), iSoftmaxTemplate.referenceTemplate,
ForkTransformer) for _type in [int8_t, uint8_t]
] + [
NodeBinding(SoftmaxChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatSoftmaxTemplate.referenceTemplate, ForkTransformer)
]

PULPTransposeBindings = [
NodeBinding(TransposeChecker([PointerClass(type)], [PointerClass(type)]), TransposeTemplate.referenceTemplate,
ForkTransformer) for type in IntegerDataTypes
] + [
NodeBinding(TransposeChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
TransposeTemplate.referenceTemplate, ForkTransformer)
]

PULPConcatBindings = [
Expand Down Expand Up @@ -314,4 +342,25 @@
NodeBinding(MulChecker([PointerClass(typeA), PointerClass(typeB)], [PointerClass(int32_t)]),
MulTemplate.referenceTemplate, ForkTransformer)
for typeA, typeB in itertools.product(SignedIntegerDataTypes, SignedIntegerDataTypes)
] + [
NodeBinding(MulChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatMulTemplate.referenceTemplate, ForkTransformer)
]

PULPReluBinding = NodeBinding(ReluChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatReluTemplate.referenceTemplate, ForkTransformer)

PULPLayernormBinding = NodeBinding(
LayerNormChecker(
[PointerClass(float32_t), PointerClass(float32_t),
PointerClass(float32_t)], [PointerClass(float32_t)]), FloatLayernormTemplate.referenceTemplate,
ForkTransformer)

PULPFloatGELUBinding = NodeBinding(
GELUChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatGELUTemplate.referenceTemplate, ForkTransformer)

PULPGatherBindings = [
NodeBinding(GatherChecker([PointerClass(float32_t), PointerClass(type)], [PointerClass(float32_t)]),
GatherTemplate.referenceTemplate, ForkTransformer) for type in IntegerDataTypes
]
51 changes: 50 additions & 1 deletion Deeploy/Targets/PULPOpen/Parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
import onnx_graphsurgeon as gs

from Deeploy.DeeployTypes import NetworkContext
from Deeploy.Targets.Generic.Parsers import GEMMParser, RQSConv1DParser, RQSConv2DParser, RQSParserInterface
from Deeploy.Targets.Generic.Parsers import Conv2DParser, GEMMParser, RQSConv1DParser, RQSConv2DParser, \
RQSParserInterface


class PULPConv2DParser(RQSConv2DParser):
Expand Down Expand Up @@ -85,6 +86,54 @@ def parseNodeCtxt(self,
return ctxt, False


class PULPFPConv2DParser(Conv2DParser):

def __init__(self, noBiasHoisting = True):
super().__init__(noBiasHoisting)

def parseNode(self, node: gs.Node) -> (bool):

wellFormed = super().parseNode(node)
if wellFormed:
ret = all([
# Make sure padding is square
self.operatorRepresentation['group'] == 1,
self.operatorRepresentation['pads'][0] == self.operatorRepresentation['pads'][2],
self.operatorRepresentation['pads'][1] == self.operatorRepresentation['pads'][3],
self.operatorRepresentation['pads'][0] == self.operatorRepresentation['pads'][1],
#self.operatorRepresentation['pads'][0] == 0,
# Don't support dilations
#all([coeff == 1 for coeff in self.operatorRepresentation['dilations']]),
len(node.inputs) == 2
])

self.operatorRepresentation['dim_kernel_x'] = int(self.operatorRepresentation['kernel_shape'][0])
self.operatorRepresentation['dim_kernel_y'] = int(self.operatorRepresentation['kernel_shape'][1])
self.operatorRepresentation['dilation_x'] = int(self.operatorRepresentation['dilations'][0])
self.operatorRepresentation['dilation_y'] = int(self.operatorRepresentation['dilations'][1])
self.operatorRepresentation['padding_y_top'] = int(self.operatorRepresentation['pads'][0])
self.operatorRepresentation['padding_x_left'] = int(self.operatorRepresentation['pads'][1])
self.operatorRepresentation['padding_y_bottom'] = int(self.operatorRepresentation['pads'][2])
self.operatorRepresentation['padding_x_right'] = int(self.operatorRepresentation['pads'][3])
self.operatorRepresentation['stride_x'] = int(self.operatorRepresentation['strides'][0])
self.operatorRepresentation['stride_y'] = int(self.operatorRepresentation['strides'][1])

return ret
return False

def parseNodeCtxt(self,
ctxt: NetworkContext,
node: gs.Node,
channels_first: bool = True) -> Tuple[NetworkContext, bool]:

newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first)

if ret:
return newCtxt, True

return ctxt, False


class PULPDWConv1DParser(RQSConv1DParser):

def __init__(self, noBiasHoisting = True):
Expand Down
Loading

0 comments on commit cd2ee51

Please sign in to comment.