From 1220f73f32b201dd8f6003b5d1ee4bc44d712e0f Mon Sep 17 00:00:00 2001 From: Xiaoguang Xing Date: Mon, 22 Jan 2024 10:31:30 +0800 Subject: [PATCH 1/6] riscv: errata: Replace thead cache clean with flush Signed-off-by: Xiaoguang Xing --- arch/riscv/include/asm/errata_list.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index d3f3c237adad7..d8a2d56279ec6 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -128,7 +128,7 @@ asm volatile(ALTERNATIVE( \ * 0000000 11001 00000 000 00000 0001011 */ #define THEAD_inval_A0 ".long 0x0265000b" -#define THEAD_clean_A0 ".long 0x0255000b" +#define THEAD_clean_A0 ".long 0x0275000b" #define THEAD_flush_A0 ".long 0x0275000b" #define THEAD_SYNC_S ".long 0x0190000b" From 2e3f73b57974fd27f08111c636db399e4b75aab5 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Wed, 4 Jan 2023 15:41:44 +0800 Subject: [PATCH 2/6] riscv: errata: cmo: add CMO macro variant with both VA and PA The standardized Zicbom extension supports only VA, however there's some vendor extensions (e.g. XtheadCmo) that can handle cache management operations on PA directly, bypassing the TLB lookup. Add a CMO alternatives macro variant that come with both VA and PA supplied, and the code can be patched to use either the VA or the PA at runtime. In this case the codepath is now patched to use VA for Zicbom and PA for XtheadCmo. Signed-off-by: Icenowy Zheng Reviewed-by: Guo Ren --- arch/riscv/include/asm/errata_list.h | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index d8a2d56279ec6..a29b67dd3c564 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -131,6 +131,9 @@ asm volatile(ALTERNATIVE( \ #define THEAD_clean_A0 ".long 0x0275000b" #define THEAD_flush_A0 ".long 0x0275000b" #define THEAD_SYNC_S ".long 0x0190000b" +#define THEAD_inval_PA_A0 ".long 0x02a5000b" +#define THEAD_clean_PA_A0 ".long 0x0295000b" +#define THEAD_flush_PA_A0 ".long 0x02b5000b" #define ALT_CMO_OP(_op, _start, _size, _cachesize) \ asm volatile(ALTERNATIVE_2( \ @@ -157,6 +160,33 @@ asm volatile(ALTERNATIVE_2( \ "r"((unsigned long)(_start) + (_size)) \ : "a0") +#define ALT_CMO_OP_VPA(_op, _vaddr, _paddr, _size, _cachesize) \ +asm volatile(ALTERNATIVE_2( \ + __nops(6), \ + "mv a0, %1\n\t" \ + "j 2f\n\t" \ + "3:\n\t" \ + "cbo." __stringify(_op) " (a0)\n\t" \ + "add a0, a0, %0\n\t" \ + "2:\n\t" \ + "bltu a0, %2, 3b\n\t" \ + "nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ + "mv a0, %3\n\t" \ + "j 2f\n\t" \ + "3:\n\t" \ + THEAD_##_op##_PA_A0 "\n\t" \ + "add a0, a0, %0\n\t" \ + "2:\n\t" \ + "bltu a0, %4, 3b\n\t" \ + THEAD_SYNC_S, THEAD_VENDOR_ID, \ + ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ + : : "r"(_cachesize), \ + "r"((unsigned long)(_vaddr) & ~((_cachesize) - 1UL)), \ + "r"((unsigned long)(_vaddr) + (_size)), \ + "r"((unsigned long)(_paddr) & ~((_cachesize) - 1UL)), \ + "r"((unsigned long)(_paddr) + (_size)) \ + : "a0") + #define THEAD_C9XX_RV_IRQ_PMU 17 #define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 From c182b4d545c52a36997216927da33380ddfda0dd Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Wed, 4 Jan 2023 15:41:45 +0800 Subject: [PATCH 3/6] riscv: use VA+PA variant of CMO macros for DMA synchorization DMA synchorization is done on PA and the VA is calculated from the PA. Use the alternative macro variant that takes both VA and PA as parameters, thus in case the ISA extension used support PA directly, the overhead for re-converting VA to PA can be omitted. Suggested-by: Guo Ren Signed-off-by: Icenowy Zheng Reviewed-by: Guo Ren --- arch/riscv/mm/dma-noncoherent.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index 341bd6706b4c5..22de7f222637f 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -25,7 +25,7 @@ static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size) return; } #endif - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP_VPA(clean, vaddr, paddr, size, riscv_cbom_block_size); } static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size) @@ -39,7 +39,7 @@ static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size) } #endif - ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP_VPA(inval, vaddr, paddr, size, riscv_cbom_block_size); } static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size) @@ -53,7 +53,7 @@ static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size) } #endif - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP_VPA(flush, vaddr, paddr, size, riscv_cbom_block_size); } static inline bool arch_sync_dma_clean_before_fromdevice(void) From e2f997329c71ae0ea181f55d52685d03130ba3b6 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Wed, 4 Jan 2023 15:41:46 +0800 Subject: [PATCH 4/6] riscv: use VA+PA variant of CMO macros for DMA page preparation When doing DMA page preparation, both the VA and the PA are easily accessible from struct page. Use the alternative macro variant that takes both VA and PA as parameters, thus in case the ISA extension used support PA directly, the overhead for re-converting VA to PA can be omitted. Suggested-by: Guo Ren Signed-off-by: Icenowy Zheng --- arch/riscv/mm/dma-noncoherent.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index 22de7f222637f..8932a93ec50cd 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -117,6 +117,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, void arch_dma_prep_coherent(struct page *page, size_t size) { void *flush_addr = page_address(page); + phys_addr_t paddr = PFN_PHYS(page_to_pfn(page)); #ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS if (unlikely(noncoherent_cache_ops.wback_inv)) { @@ -125,7 +126,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) } #endif - ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size); + ALT_CMO_OP_VPA(flush, flush_addr, paddr, size, riscv_cbom_block_size); } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, From 94ca55e9ec41a14cc6ea906636e8987f0a40ebc7 Mon Sep 17 00:00:00 2001 From: Xiaoguang Xing Date: Thu, 19 Oct 2023 17:55:11 +0800 Subject: [PATCH 5/6] riscv: errata: thead: Make cache clean to flush Signed-off-by: Xiaoguang Xing --- arch/riscv/include/asm/errata_list.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index a29b67dd3c564..d415c50f22e84 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -132,7 +132,7 @@ asm volatile(ALTERNATIVE( \ #define THEAD_flush_A0 ".long 0x0275000b" #define THEAD_SYNC_S ".long 0x0190000b" #define THEAD_inval_PA_A0 ".long 0x02a5000b" -#define THEAD_clean_PA_A0 ".long 0x0295000b" +#define THEAD_clean_PA_A0 ".long 0x02b5000b" #define THEAD_flush_PA_A0 ".long 0x02b5000b" #define ALT_CMO_OP(_op, _start, _size, _cachesize) \ From dfbaf89f104465fdc33864f026553139a12d2c55 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Fri, 24 Feb 2023 17:22:38 +0800 Subject: [PATCH 6/6] riscv: changing T-Head PBMT attributes Originall the T-Head PBMT implementation in the kernel is intended for D1, thus the Sharable bit is not set. In addition, the Bufferable bit is not set for writecombine situation. Set these bits in the T-Head PBMT attributes definition. Signed-off-by: Icenowy Zheng --- arch/riscv/include/asm/pgtable-64.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 3272ca7a5270b..109e4808a23e5 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -127,13 +127,13 @@ enum napot_cont_order { /* * [63:59] T-Head Memory Type definitions: * - * 00000 - NC Weakly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable + * 00010 - NC Weakly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable * 01110 - PMA Weakly-ordered, Cacheable, Bufferable, Shareable, Non-trustable - * 10000 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable + * 10010 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable */ #define _PAGE_PMA_THEAD ((1UL << 62) | (1UL << 61) | (1UL << 60)) -#define _PAGE_NOCACHE_THEAD 0UL -#define _PAGE_IO_THEAD (1UL << 63) +#define _PAGE_NOCACHE_THEAD ((1UL << 61) | (1UL << 60)) +#define _PAGE_IO_THEAD ((1UL << 63) | (1UL << 60)) #define _PAGE_MTMASK_THEAD (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59)) static inline u64 riscv_page_mtmask(void)