From 6dece184bfa851fb740f87dfd01f78a9e7634282 Mon Sep 17 00:00:00 2001 From: Adam Straw Date: Fri, 11 Feb 2022 15:53:18 -0800 Subject: [PATCH] Adding support for Hexagon User DMA Engine (#10217) * initial hexagon user dma impl * Hexagon User DMA descriptor, instruction and register headers * Synchronous 1D DMA working * HexagonBuffer unit tests passing with memcpy * cleanup * comments and orgnanize code * format and lint * init function + other code review feedback * add ifdef hexagon around inline asm --- cmake/modules/Hexagon.cmake | 1 + src/runtime/hexagon/hexagon/hexagon_buffer.cc | 19 +- .../hexagon/hexagon/hexagon_user_dma.cc | 118 +++++++ .../hexagon/hexagon_user_dma_descriptors.h | 310 ++++++++++++++++++ .../hexagon/hexagon_user_dma_instructions.h | 79 +++++ .../hexagon/hexagon_user_dma_registers.h | 278 ++++++++++++++++ 6 files changed, 799 insertions(+), 6 deletions(-) create mode 100644 src/runtime/hexagon/hexagon/hexagon_user_dma.cc create mode 100644 src/runtime/hexagon/hexagon/hexagon_user_dma_descriptors.h create mode 100644 src/runtime/hexagon/hexagon/hexagon_user_dma_instructions.h create mode 100644 src/runtime/hexagon/hexagon/hexagon_user_dma_registers.h diff --git a/cmake/modules/Hexagon.cmake b/cmake/modules/Hexagon.cmake index a990101bdecf..b90528d0767d 100644 --- a/cmake/modules/Hexagon.cmake +++ b/cmake/modules/Hexagon.cmake @@ -76,6 +76,7 @@ if (NOT BUILD_FOR_HEXAGON AND NOT BUILD_FOR_ANDROID) # append select runtime sources for unit testing list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon/hexagon_buffer.cc) list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon/hexagon_common.cc) + list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon/hexagon_user_dma.cc) return() elseif(NOT USE_HEXAGON_DEVICE STREQUAL "${PICK_SIM}" AND NOT USE_HEXAGON_DEVICE STREQUAL "${PICK_HW}") diff --git a/src/runtime/hexagon/hexagon/hexagon_buffer.cc b/src/runtime/hexagon/hexagon/hexagon_buffer.cc index dfb499c9a31a..752c5325af21 100644 --- a/src/runtime/hexagon/hexagon/hexagon_buffer.cc +++ b/src/runtime/hexagon/hexagon/hexagon_buffer.cc @@ -37,6 +37,8 @@ namespace tvm { namespace runtime { namespace hexagon { +int hexagon_user_dma_1d_sync(void* src, void* dst, uint32_t length); + struct Allocation { Allocation(size_t allocation_nbytes, size_t alignment) : allocation_nbytes_(allocation_nbytes), alignment_(alignment) {} @@ -198,8 +200,10 @@ void HexagonBuffer::CopyTo(void* data, size_t nbytes) const { size_t bytes_to_copy = std::min(nbytes - copied, managed_allocations_[i]->allocation_nbytes_); if (bytes_to_copy == 0) break; - memcpy(static_cast(data) + copied, - static_cast(managed_allocations_[i]->data_), bytes_to_copy); + void* data_plus_copied = static_cast((static_cast(data) + copied)); + int status = + hexagon_user_dma_1d_sync(data_plus_copied, managed_allocations_[i]->data_, bytes_to_copy); + CHECK_EQ(status, 0); copied += bytes_to_copy; } @@ -215,8 +219,10 @@ void HexagonBuffer::CopyFrom(void* data, size_t nbytes) { size_t bytes_to_copy = std::min(nbytes - copied, managed_allocations_[i]->allocation_nbytes_); if (bytes_to_copy == 0) break; - memcpy(static_cast(managed_allocations_[i]->data_), - static_cast(data) + copied, bytes_to_copy); + void* data_plus_copied = static_cast((static_cast(data) + copied)); + int status = + hexagon_user_dma_1d_sync(managed_allocations_[i]->data_, data_plus_copied, bytes_to_copy); + CHECK_EQ(status, 0); copied += bytes_to_copy; } @@ -239,8 +245,9 @@ void HexagonBuffer::CopyFrom(const HexagonBuffer& other, size_t nbytes) { CHECK_LE(other.managed_allocations_[i]->allocation_nbytes_, managed_allocations_[i]->allocation_nbytes_); - memcpy(static_cast(managed_allocations_[i]->data_), - static_cast(other.managed_allocations_[i]->data_), bytes_to_copy); + int status = hexagon_user_dma_1d_sync(managed_allocations_[i]->data_, + other.managed_allocations_[i]->data_, bytes_to_copy); + CHECK_EQ(status, 0); copied += bytes_to_copy; } diff --git a/src/runtime/hexagon/hexagon/hexagon_user_dma.cc b/src/runtime/hexagon/hexagon/hexagon_user_dma.cc new file mode 100644 index 000000000000..18da4af61090 --- /dev/null +++ b/src/runtime/hexagon/hexagon/hexagon_user_dma.cc @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#include "hexagon_common.h" +#include "hexagon_user_dma_descriptors.h" +#include "hexagon_user_dma_instructions.h" +#include "hexagon_user_dma_registers.h" + +namespace tvm { +namespace runtime { +namespace hexagon { + +int init_hexagon_user_dma() { +#if defined(__hexagon__) + // reset DMA engine + unsigned int status = dmpause() & DM0_STATUS_MASK; + if (status != DM0_STATUS_IDLE) { + return DMA_FAILURE; + } +#endif + return DMA_SUCCESS; +} + +int hexagon_user_dma_1d_sync(void* dst, void* src, uint32_t length) { +#if defined(__hexagon__) + static int config_dma = init_hexagon_user_dma(); + if (config_dma != DMA_SUCCESS) { + return DMA_FAILURE; + } + + uint64_t src64 = reinterpret_cast(src); + // source address limited to 32 bits + if (src64 > DESC_SRC_MASK) { + return DMA_FAILURE; + } + + uint64_t dst64 = reinterpret_cast(dst); + // destination address limited to 32 bits + if (dst64 > DESC_DST_MASK) { + return DMA_FAILURE; + } + + // length limited to 24 bits + if (length > DESC_LENGTH_MASK) { + return DMA_FAILURE; + } + + uint32_t src32 = src64 & DESC_SRC_MASK; + uint32_t dst32 = dst64 & DESC_DST_MASK; + + void* dma_desc = nullptr; + +#ifdef _WIN32 + dma_desc = _aligned_malloc(DMA_DESC_2D_SIZE, DMA_DESC_2D_SIZE); +#else + int ret = posix_memalign(&dma_desc, DMA_DESC_2D_SIZE, DMA_DESC_2D_SIZE); + if (ret) { + return DMA_FAILURE; + } +#endif + + if (!dma_desc) { + return DMA_FAILURE; + } + + dma_desc_set_next(dma_desc, DMA_NULL_PTR); + dma_desc_set_length(dma_desc, length); + dma_desc_set_desctype(dma_desc, DESC_DESCTYPE_1D); + dma_desc_set_dstcomp(dma_desc, DESC_COMP_NONE); + dma_desc_set_srccomp(dma_desc, DESC_COMP_NONE); + dma_desc_set_bypassdst(dma_desc, DESC_BYPASS_OFF); + dma_desc_set_bypasssrc(dma_desc, DESC_BYPASS_OFF); + dma_desc_set_order(dma_desc, DESC_ORDER_ORDER); + dma_desc_set_dstate(dma_desc, DESC_DSTATE_INCOMPLETE); + dma_desc_set_src(dma_desc, src32); + dma_desc_set_dst(dma_desc, dst32); + + dmstart(dma_desc); + unsigned int status = dmwait() & DM0_STATUS_MASK; + unsigned int done = dma_desc_get_dstate(dma_desc); + +#ifdef _WIN32 + _aligned_free(dma_desc); +#else + free(dma_desc); +#endif + + if (status == DM0_STATUS_IDLE && done == DESC_DSTATE_COMPLETE) { + return DMA_SUCCESS; + } + return DMA_FAILURE; +#else + memcpy(dst, src, length); + return DMA_SUCCESS; +#endif +} + +} // namespace hexagon +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/hexagon/hexagon/hexagon_user_dma_descriptors.h b/src/runtime/hexagon/hexagon/hexagon_user_dma_descriptors.h new file mode 100644 index 000000000000..a90c25e71199 --- /dev/null +++ b/src/runtime/hexagon/hexagon/hexagon_user_dma_descriptors.h @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_DESCRIPTORS_H_ +#define TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_DESCRIPTORS_H_ + +namespace tvm { +namespace runtime { +namespace hexagon { + +// NOTE: Using 2D descriptor size even for 1D descriptors +#define DMA_DESC_2D_SIZE 32 + +// TODO(Straw): DMA Status [0][3:0]? + +// desc[0][31:4] +// Descriptors addresses must be (minimum) 16 byte aligned +// -> Lower 4 bits masked to clear DMA Status +// -> But, descriptor address is not shifted +#define DESC_NEXT_MASK 0xFFFFFFF0 +#define DESC_NEXT_SHIFT 0 + +// desc[1][23:0] +#define DESC_LENGTH_MASK 0x00FFFFFF +#define DESC_LENGTH_SHIFT 0 + +// desc[1][25:24] +#define DESC_DESCTYPE_MASK 0x03000000 +#define DESC_DESCTYPE_SHIFT 24 +#define DESC_DESCTYPE_1D 0 +#define DESC_DESCTYPE_2D 1 + +// TODO(Straw): Definition? Not in the spec. +// desc[1][26] +#define DESC_DSTCOMP_MASK 0x04000000 +#define DESC_DSTCOMP_SHIFT 26 +// desc[1][27] +#define DESC_SRCCOMP_MASK 0x08000000 +#define DESC_SRCCOMP_SHIFT 27 +#define DESC_COMP_NONE 0 +#define DESC_COMP_DLBC 1 + +// desc[1][28] +#define DESC_BYPASSDST_MASK 0x10000000 +#define DESC_BYPASSDST_SHIFT 28 +// desc[1][29] +#define DESC_BYPASSSRC_MASK 0x20000000 +#define DESC_BYPASSSRC_SHIFT 29 +#define DESC_BYPASS_OFF 0 +#define DESC_BYPASS_ON 1 + +// desc[1][30] +#define DESC_ORDER_MASK 0x40000000 +#define DESC_ORDER_SHIFT 30 +#define DESC_ORDER_NOORDER 0 +#define DESC_ORDER_ORDER 1 + +// desc[1][31] +#define DESC_DSTATE_MASK 0x80000000 +#define DESC_DSTATE_SHIFT 31 +#define DESC_DSTATE_INCOMPLETE 0 +#define DESC_DSTATE_COMPLETE 1 + +// desc[2] +#define DESC_SRC_MASK 0xFFFFFFFF +#define DESC_SRC_SHIFT 0 + +// desc[3] +#define DESC_DST_MASK 0xFFFFFFFF +#define DESC_DST_SHIFT 0 + +// desc[4][25:24] +#define DESC_CACHEALLOC_MASK 0x03000000 +#define DESC_CACHEALLOC_SHIFT 24 +#define DESC_CACHEALLOC_NONE 0 +#define DESC_CACHEALLOC_WRITEONLY 1 +#define DESC_CACHEALLOC_READONLY 2 +#define DESC_CACHEALLOC_READWRITE 3 + +// TODO(Straw): Definition? Not in the spec. +// desc[4][31:28] +#define DESC_PADDING_MASK 0xF0000000 +#define DESC_PADDING_SHIFT 28 + +// desc[5][15:0] +#define DESC_ROIWIDTH_MASK 0x0000FFFF +#define DESC_ROIWIDTH_SHIFT 0 + +// desc[5][31:16] +#define DESC_ROIHEIGHT_MASK 0xFFFF0000 +#define DESC_ROIHEIGHT_SHIFT 16 + +// desc[6][15:0] +#define DESC_SRCSTRIDE_MASK 0x0000FFFF +#define DESC_SRCSTRIDE_SHIFT 0 + +// desc[6][31:16] +#define DESC_DSTSTRIDE_MASK 0xFFFF0000 +#define DESC_DSTSTRIDE_SHIFT 16 + +// desc[7][15:0] +#define DESC_SRCWIDTHOFFSET_MASK 0x0000FFFF +#define DESC_SRCWIDTHOFFSET_SHIFT 0 + +// desc[7][31:16] +#define DESC_DSTWIDTHOFFSET_MASK 0xFFFF0000 +#define DESC_DSTWIDTHOFFSET_SHIFT 16 + +#define DMA_SUCCESS 0 +#define DMA_FAILURE -1 +#define DMA_NULL_PTR 0 + +/**************************/ +/* 1D (linear) descriptor */ +/**************************/ +struct dma_desc_1d_t { + unsigned int next; + unsigned int dstate_order_bypass_comp_desctype_length; + unsigned int src; + unsigned int dst; +}; + +/***********************/ +/* 2D (box) descriptor */ +/***********************/ +struct dma_desc_2d_t { + unsigned int next; + unsigned int dstate_order_bypass_comp_desctype_length; + unsigned int src; + unsigned int dst; + unsigned int allocation_padding; + unsigned int roiheight_roiwidth; + unsigned int dststride_srcstride; + unsigned int dstwidthoffset_srcwidthoffset; +}; + +// desc[0][31:4] +inline void dma_desc_set_next(void* dma_desc_ptr, unsigned int v) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_1d_ptr->next) &= ~DESC_NEXT_MASK; + (dma_desc_1d_ptr->next) |= ((v << DESC_NEXT_SHIFT) & DESC_NEXT_MASK); +} + +// desc[1][23:0] +inline void dma_desc_set_length(void* dma_desc_ptr, unsigned int v) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) &= ~DESC_LENGTH_MASK; + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) |= + ((v << DESC_LENGTH_SHIFT) & DESC_LENGTH_MASK); +} + +// desc[1][25:24] +inline void dma_desc_set_desctype(void* dma_desc_ptr, unsigned int v) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) &= ~DESC_DESCTYPE_MASK; + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) |= + ((v << DESC_DESCTYPE_SHIFT) & DESC_DESCTYPE_MASK); +} + +// TODO(Straw): Definition? Not in the spec. +// desc[1][26] +inline void dma_desc_set_dstcomp(void* dma_desc_ptr, unsigned int v) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) &= ~DESC_DSTCOMP_MASK; + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) |= + ((v << DESC_DSTCOMP_SHIFT) & DESC_DSTCOMP_MASK); +} + +// TODO(Straw): Definition? Not in the spec. +// desc[1][27] +inline void dma_desc_set_srccomp(void* dma_desc_ptr, unsigned int v) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) &= ~DESC_SRCCOMP_MASK; + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) |= + ((v << DESC_SRCCOMP_SHIFT) & DESC_SRCCOMP_MASK); +} + +// desc[1][28] +inline void dma_desc_set_bypassdst(void* dma_desc_ptr, unsigned int v) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) &= ~DESC_BYPASSDST_MASK; + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) |= + ((v << DESC_BYPASSDST_SHIFT) & DESC_BYPASSDST_MASK); +} + +// desc[1][29] +inline void dma_desc_set_bypasssrc(void* dma_desc_ptr, unsigned int v) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) &= ~DESC_BYPASSSRC_MASK; + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) |= + ((v << DESC_BYPASSSRC_SHIFT) & DESC_BYPASSSRC_MASK); +} + +// desc[1][30] +inline void dma_desc_set_order(void* dma_desc_ptr, unsigned int v) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) &= ~DESC_ORDER_MASK; + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) |= + ((v << DESC_ORDER_SHIFT) & DESC_ORDER_MASK); +} + +// desc[1][31] +inline void dma_desc_set_dstate(void* dma_desc_ptr, unsigned int v) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) &= ~DESC_DSTATE_MASK; + (dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) |= + ((v << DESC_DSTATE_SHIFT) & DESC_DSTATE_MASK); +} + +// desc[1][31] +inline unsigned int dma_desc_get_dstate(void* dma_desc_ptr) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + return (((dma_desc_1d_ptr->dstate_order_bypass_comp_desctype_length) & DESC_DSTATE_MASK) >> + DESC_DSTATE_SHIFT); +} + +// desc[2] +inline void dma_desc_set_src(void* dma_desc_ptr, unsigned int v) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_1d_ptr->src) &= ~DESC_SRC_MASK; + (dma_desc_1d_ptr->src) |= ((v << DESC_SRC_SHIFT) & DESC_SRC_MASK); +} + +// desc[3] +inline void dma_desc_set_dst(void* dma_desc_ptr, unsigned int v) { + dma_desc_1d_t* dma_desc_1d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_1d_ptr->dst) &= ~DESC_DST_MASK; + (dma_desc_1d_ptr->dst) |= ((v << DESC_DST_SHIFT) & DESC_DST_MASK); +} + +// desc[4][25:24] +inline void dma_desc_set_cachealloc(void* dma_desc_ptr, unsigned int v) { + dma_desc_2d_t* dma_desc_2d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_2d_ptr->allocation_padding) &= ~DESC_CACHEALLOC_MASK; + (dma_desc_2d_ptr->allocation_padding) |= ((v << DESC_CACHEALLOC_SHIFT) & DESC_CACHEALLOC_MASK); +} + +// TODO(Straw): Definition? Not in the spec. +// desc[4][31:28] +inline void dma_desc_set_padding(void* dma_desc_ptr, unsigned int v) { + dma_desc_2d_t* dma_desc_2d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_2d_ptr->allocation_padding) &= ~DESC_PADDING_MASK; + (dma_desc_2d_ptr->allocation_padding) |= ((v << DESC_PADDING_SHIFT) & DESC_PADDING_MASK); +} + +// desc[5][15:0] +inline void dma_desc_set_roiwidth(void* dma_desc_ptr, unsigned int v) { + dma_desc_2d_t* dma_desc_2d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_2d_ptr->roiheight_roiwidth) &= ~DESC_ROIWIDTH_MASK; + (dma_desc_2d_ptr->roiheight_roiwidth) |= ((v << DESC_ROIWIDTH_SHIFT) & DESC_ROIWIDTH_MASK); +} + +// desc[5][31:16] +inline void dma_desc_set_roiheight(void* dma_desc_ptr, unsigned int v) { + dma_desc_2d_t* dma_desc_2d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_2d_ptr->roiheight_roiwidth) &= ~DESC_ROIHEIGHT_MASK; + (dma_desc_2d_ptr->roiheight_roiwidth) |= ((v << DESC_ROIHEIGHT_SHIFT) & DESC_ROIHEIGHT_MASK); +} + +// desc[6][15:0] +inline void dma_desc_set_srcstride(void* dma_desc_ptr, unsigned int v) { + dma_desc_2d_t* dma_desc_2d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_2d_ptr->dststride_srcstride) &= ~DESC_SRCSTRIDE_MASK; + (dma_desc_2d_ptr->dststride_srcstride) |= ((v << DESC_SRCSTRIDE_SHIFT) & DESC_SRCSTRIDE_MASK); +} + +// desc[6][31:16] +inline void dma_desc_set_dststride(void* dma_desc_ptr, unsigned int v) { + dma_desc_2d_t* dma_desc_2d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_2d_ptr->dststride_srcstride) &= ~DESC_DSTSTRIDE_MASK; + (dma_desc_2d_ptr->dststride_srcstride) |= ((v << DESC_DSTSTRIDE_SHIFT) & DESC_DSTSTRIDE_MASK); +} + +// desc[7][15:0] +inline void dma_desc_set_srcwidthoffset(void* dma_desc_ptr, unsigned int v) { + dma_desc_2d_t* dma_desc_2d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_2d_ptr->dstwidthoffset_srcwidthoffset) &= ~DESC_SRCWIDTHOFFSET_MASK; + (dma_desc_2d_ptr->dstwidthoffset_srcwidthoffset) |= + ((v << DESC_SRCWIDTHOFFSET_SHIFT) & DESC_SRCWIDTHOFFSET_MASK); +} + +// desc[7][31:16] +inline void dma_desc_set_dstwidthoffset(void* dma_desc_ptr, unsigned int v) { + dma_desc_2d_t* dma_desc_2d_ptr = reinterpret_cast(dma_desc_ptr); + (dma_desc_2d_ptr->dstwidthoffset_srcwidthoffset) &= ~DESC_DSTWIDTHOFFSET_MASK; + (dma_desc_2d_ptr->dstwidthoffset_srcwidthoffset) |= + ((v << DESC_DSTWIDTHOFFSET_SHIFT) & DESC_DSTWIDTHOFFSET_MASK); +} + +} // namespace hexagon +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_DESCRIPTORS_H_ diff --git a/src/runtime/hexagon/hexagon/hexagon_user_dma_instructions.h b/src/runtime/hexagon/hexagon/hexagon_user_dma_instructions.h new file mode 100644 index 000000000000..8687e338d0bf --- /dev/null +++ b/src/runtime/hexagon/hexagon/hexagon_user_dma_instructions.h @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_INSTRUCTIONS_H_ +#define TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_INSTRUCTIONS_H_ + +namespace tvm { +namespace runtime { +namespace hexagon { + +#if defined(__hexagon__) + +inline unsigned int dmpause() { + unsigned int dm0 = 0; + asm volatile(" %0 = dmpause" : "=r"(dm0)); + return dm0; +} + +inline void dmstart(void* next) { asm volatile(" dmstart(%0)" : : "r"(next)); } + +inline unsigned int dmpoll() { + unsigned int dm0 = 0; + asm volatile(" %0 = dmpoll" : "=r"(dm0)); + return dm0; +} + +inline unsigned int dmwait() { + unsigned int dm0 = 0; + asm volatile(" %0 = dmwait" : "=r"(dm0)); + return dm0; +} + +inline void dmresume(unsigned int dm0) { asm volatile(" dmresume(%0)" : : "r"(dm0)); } + +inline unsigned int dmsyncht() { + unsigned int dm0 = 0; + asm volatile(" %0 = dmsyncht" : "=r"(dm0)); + return dm0; +} + +inline unsigned int dmtlbsynch() { + unsigned int dm0 = 0; + asm volatile(" %0 = dmtlbsynch" : "=r"(dm0)); + return dm0; +} + +inline unsigned int dmcfgrd(unsigned int dmindex) { + unsigned int data = 0; + asm volatile(" %0 = dmcfgrd(%1)" : "=r"(data) : "r"(dmindex)); + return data; +} + +inline void dmcfgwr(unsigned int dmindex, unsigned int data) { + asm volatile(" dmcfgwr(%0, %1)" : : "r"(dmindex), "r"(data)); +} + +#endif + +} // namespace hexagon +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_INSTRUCTIONS_H_ diff --git a/src/runtime/hexagon/hexagon/hexagon_user_dma_registers.h b/src/runtime/hexagon/hexagon/hexagon_user_dma_registers.h new file mode 100644 index 000000000000..2463e3ba7ac9 --- /dev/null +++ b/src/runtime/hexagon/hexagon/hexagon_user_dma_registers.h @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_REGISTERS_H_ +#define TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_REGISTERS_H_ + +namespace tvm { +namespace runtime { +namespace hexagon { + +/* Register offset */ +#define regDM0 0x0 // per engine configuration +#define regDM1 0x1 // reserved +#define regDM2 0x2 // global control shared by all DMA Engines +#define regDM3 0x3 // reserved +#define regDM4 0x4 // global error syndrome register shared by all DMA Engines +#define regDM5 0x5 // global error syndrome register shared by all DMA Engines + +// DM0[1:0] +#define DM0_STATUS_MASK 0x00000003 +#define DM0_STATUS_SHIFT 0 +#define DM0_STATUS_IDLE 0 +#define DM0_STATUS_RUN 1 +#define DM0_STATUS_ERROR 2 + +// DM0[31:4] +// Descriptors addresses must be (minimum) 16 byte aligned +// -> Lower 4 bits masked to clear DMA Status +// -> But, descriptor address is not shifted +#define DM0_DESC_ADDR_MASK 0xFFFFFFF0 +#define DM0_DESC_ADDR_SHIFT 0 + +// DM2[0] +#define DM2_GUEST_MODE_STALL_MASK 0x00000001 +#define DM2_GUEST_MODE_STALL_SHIFT 0 +#define DM2_GUEST_MODE_STALL_YES 0 +#define DM2_GUEST_MODE_STALL_NO 1 + +// DM2[1] +#define DM2_MONITOR_MODE_STALL_MASK 0x00000002 +#define DM2_MONITOR_MODE_STALL_SHIFT 1 +#define DM2_MONITOR_MODE_STALL_YES 0 +#define DM2_MONITOR_MODE_STALL_NO 1 + +// DM2[3] +#define DM2_EXCEPTION_MODE_CONTINUE_MASK 0x00000008 +#define DM2_EXCEPTION_MODE_CONTINUE_SHIFT 3 +#define DM2_EXCEPTION_MODE_CONTINUE_YES 0 +#define DM2_EXCEPTION_MODE_CONTINUE_NO 1 + +// DM2[4] +#define DM2_DEBUG_MODE_CONTINUE_MASK 0x00000010 +#define DM2_DEBUG_MODE_CONTINUE_SHIFT 4 +#define DM2_DEBUG_MODE_CONTINUE_NO 0 +#define DM2_DEBUG_MODE_CONTINUE_YES 1 + +// DM2[6:5] +#define DM2_TRAFFIC_PRIORITY_MASK 0x00000060 +#define DM2_TRAFFIC_PRIORITY_SHIFT 5 +#define DM2_TRAFFIC_PRIORITY_IDLE 0 +#define DM2_TRAFFIC_PRIORITY_LOW 1 +#define DM2_TRAFFIC_PRIORITY_INHERIT 2 +#define DM2_TRAFFIC_PRIORITY_HIGH 3 + +// DM2[7] +#define DM2_DLBC_ENABLE_MASK 0x00000080 +#define DM2_DLBC_ENABLE_SHIFT 7 +#define DM2_DLBC_DISABLE 0 +#define DM2_DLBC_ENABLE 1 + +// DM2[8] +#define DM2_OOO_WRITE_MASK 0x00000100 +#define DM2_OOO_WRITE_SHIFT 8 +#define DM2_OOO_WRITE_ENABLE 0 +#define DM2_OOO_WRITE_DISABLE 1 + +// DM2[9] +#define DM2_ERROR_EXCEPTION_MASK 0x00000200 +#define DM2_ERROR_EXCEPTION_SHIFT 9 +#define DM2_ERROR_EXCEPTION_GENERATE_NO 0 +#define DM2_ERROR_EXCEPTION_GENERATE_YES 1 + +// DM2[23:16] +#define DM2_OUTSTANDING_READ_MASK 0x00FF0000 +#define DM2_OUTSTANDING_READ_SHIFT 16 + +// DM2[31:24] +#define DM2_OUTSTANDING_WRITE_MASK 0xFF000000 +#define DM2_OUTSTANDING_WRITE_SHIFT 24 + +// DM4[0] +#define DM4_ERROR_MASK 0x00000001 +#define DM4_ERROR_SHIFT 0 +#define DM4_ERROR_NO 0 +#define DM4_ERROR_YES 1 + +// DM4[7:4] +#define DM4_THREAD_ID_MASK 0x000000F0 +#define DM4_THREAD_ID_SHIFT 4 + +// DM4[15:8] +#define DM4_SYNDRONE_CODE_MASK 0x0000FF00 +#define DM4_SYNDRONE_CODE_SHIFT 8 +#define DM4_SYNDRONE_CODE_DM_COMMAND_ERROR 0 +#define DM4_SYNDRONE_CODE_DESCRIPTOR_INVALID_ALIGNMENT 1 +#define DM4_SYNDRONE_CODE_DESCRIPTOR_INVALID_TYPE 2 +#define DM4_SYNDRONE_CODE_UNSUPPORTED_ADDRESS 3 +#define DM4_SYNDRONE_CODE_UNSUPPORTED_BYPASS_MODE 4 +#define DM4_SYNDRONE_CODE_UNSUPPORTED_COMP_FORMAT 5 +#define DM4_SYNDRONE_CODE_DESCRIPTOR_ROI_ERROR 6 +#define DM4_SYNDRONE_CODE_BUS_ERROR_DESCRIPTOR_RW 7 +#define DM4_SYNDRONE_CODE_BUS_ERROR_L2_READ 8 +#define DM4_SYNDRONE_CODE_BUS_ERROR_L2_WRITE 9 +// TODO(Straw): Bus Error (10) on Compression Metadata? +// TODO(Straw): Definition? Not in the spec. +#define DM4_SYNDRONE_CODE_INVALID_ACCESS_RIGHTS 102 +#define DM4_SYNDRONE_CODE_DATA_TIMEOUT 103 +#define DM4_SYNDRONE_CODE_DATA_ABORT 104 + +// DM5 +#define DM5_SYNDRONE_ADDR_MASK 0xFFFFFFFF +#define DM5_SYNDRONE_ADDR_SHIFT 0 + +// DM2[0] +static inline unsigned int dm2_get_guest_mode(unsigned int cfg) { + return (cfg & DM2_GUEST_MODE_STALL_MASK) >> DM2_GUEST_MODE_STALL_SHIFT; +} + +// DM2[0] +static inline void dm2_set_guest_mode(unsigned int* cfg, unsigned int v) { + *cfg &= ~DM2_GUEST_MODE_STALL_MASK; + *cfg |= ((v << DM2_GUEST_MODE_STALL_SHIFT) & DM2_GUEST_MODE_STALL_MASK); +} + +// DM2[1] +static inline unsigned int dm2_get_monitor_mode(unsigned int cfg) { + return (cfg & DM2_MONITOR_MODE_STALL_MASK) >> DM2_MONITOR_MODE_STALL_SHIFT; +} + +// DM2[1] +static inline void dm2_set_monitor_mode(unsigned int* cfg, unsigned int v) { + *cfg &= ~DM2_MONITOR_MODE_STALL_MASK; + *cfg |= ((v << DM2_MONITOR_MODE_STALL_SHIFT) & DM2_MONITOR_MODE_STALL_MASK); +} + +// DM2[3] +static inline unsigned int dm2_get_exception_mode(unsigned int cfg) { + return (cfg & DM2_EXCEPTION_MODE_CONTINUE_MASK) >> DM2_EXCEPTION_MODE_CONTINUE_SHIFT; +} + +// DM2[3] +static inline void dm2_set_exception_mode(unsigned int* cfg, unsigned int v) { + *cfg &= ~DM2_EXCEPTION_MODE_CONTINUE_MASK; + *cfg |= ((v << DM2_EXCEPTION_MODE_CONTINUE_SHIFT) & DM2_EXCEPTION_MODE_CONTINUE_MASK); +} + +// DM2[4] +static inline unsigned int dm2_get_debug_mode(unsigned int cfg) { + return (cfg & DM2_DEBUG_MODE_CONTINUE_MASK) >> DM2_DEBUG_MODE_CONTINUE_SHIFT; +} + +// DM2[4] +static inline void dm2_set_debug_mode(unsigned int* cfg, unsigned int v) { + *cfg &= ~DM2_DEBUG_MODE_CONTINUE_MASK; + *cfg |= ((v << DM2_DEBUG_MODE_CONTINUE_SHIFT) & DM2_DEBUG_MODE_CONTINUE_MASK); +} + +// DM2[6:5] +static inline unsigned int dm2_get_priority(unsigned int cfg) { + return (cfg & DM2_TRAFFIC_PRIORITY_MASK) >> DM2_TRAFFIC_PRIORITY_SHIFT; +} + +// DM2[6:5] +static inline void dm2_set_priority(unsigned int* cfg, unsigned int v) { + *cfg &= ~DM2_TRAFFIC_PRIORITY_MASK; + *cfg |= ((v << DM2_TRAFFIC_PRIORITY_SHIFT) & DM2_TRAFFIC_PRIORITY_MASK); +} + +// DM2[7] +static inline unsigned int dm2_get_dlbc_enable(unsigned int cfg) { + return (cfg & DM2_DLBC_ENABLE_MASK) >> DM2_DLBC_ENABLE_SHIFT; +} + +// DM2[7] +static inline void dm2_set_dlbc_enable(unsigned int* cfg, unsigned int v) { + *cfg &= ~DM2_DLBC_ENABLE_MASK; + *cfg |= ((v << DM2_DLBC_ENABLE_SHIFT) & DM2_DLBC_ENABLE_MASK); +} + +// DM2[8] +static inline unsigned int dm2_get_ooo_write_ctrl(unsigned int cfg) { + return (cfg & DM2_OOO_WRITE_MASK) >> DM2_OOO_WRITE_SHIFT; +} + +// DM2[8] +static inline void dm2_set_ooo_write_ctrl(unsigned int* cfg, unsigned int v) { + *cfg &= ~DM2_OOO_WRITE_MASK; + *cfg |= ((v << DM2_OOO_WRITE_SHIFT) & DM2_OOO_WRITE_MASK); +} + +// DM2[9] +static inline unsigned int dm2_get_error_exception_ctrl(unsigned int cfg) { + return (cfg & DM2_ERROR_EXCEPTION_MASK) >> DM2_ERROR_EXCEPTION_SHIFT; +} + +// DM2[9] +static inline void dm2_set_error_exception_ctrl(unsigned int* cfg, unsigned int v) { + *cfg &= ~DM2_ERROR_EXCEPTION_MASK; + *cfg |= ((v << DM2_ERROR_EXCEPTION_SHIFT) & DM2_ERROR_EXCEPTION_MASK); +} + +// DM2[23:16] +static inline unsigned int dm2_get_outstanding_transactions_read(unsigned int cfg) { + return (cfg & DM2_OUTSTANDING_READ_MASK) >> DM2_OUTSTANDING_READ_SHIFT; +} + +// DM2[23:16] +static inline void dm2_set_outstanding_transactions_read(unsigned int* cfg, unsigned int v) { + *cfg &= ~DM2_OUTSTANDING_READ_MASK; + *cfg |= ((v << DM2_OUTSTANDING_READ_SHIFT) & DM2_OUTSTANDING_READ_MASK); +} + +// DM2[31:24] +static inline unsigned int dm2_get_outstanding_transactions_write(unsigned int cfg) { + return (cfg & DM2_OUTSTANDING_WRITE_MASK) >> DM2_OUTSTANDING_WRITE_SHIFT; +} + +// DM2[31:24] +static inline void dm2_set_outstanding_transactions_write(unsigned int* cfg, unsigned int v) { + *cfg &= ~DM2_OUTSTANDING_WRITE_MASK; + *cfg |= ((v << DM2_OUTSTANDING_WRITE_SHIFT) & DM2_OUTSTANDING_WRITE_MASK); +} + +/*--------------------------------------------------------------------------*/ + +// DM4[0] +static inline unsigned int dm4_get_error(unsigned int cfg) { + return (cfg & DM4_ERROR_MASK) >> DM4_ERROR_SHIFT; +} + +// DM4[7:4] +static inline unsigned int dm4_get_engine_id(unsigned int cfg) { + return (cfg & DM4_THREAD_ID_MASK) >> DM4_THREAD_ID_SHIFT; +} + +// DM4[15:8] +static inline unsigned int dm4_get_syndrone_code(unsigned int cfg) { + return (cfg & DM4_SYNDRONE_CODE_MASK) >> DM4_SYNDRONE_CODE_SHIFT; +} + +/*--------------------------------------------------------------------------*/ + +// DM5 +static inline unsigned int dm5_get_syndrone_addr(unsigned int cfg) { + return (cfg & DM5_SYNDRONE_ADDR_MASK) >> DM5_SYNDRONE_ADDR_SHIFT; +} + +} // namespace hexagon +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_REGISTERS_H_