From da4bb746518f7ec49a39ecc50aa45d486399d762 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 7 Jul 2022 23:26:52 +0200 Subject: [PATCH 1/5] AtlasEngine: Implement LRU invalidation for glyph tiles --- src/renderer/atlas/AtlasEngine.cpp | 102 +++------- src/renderer/atlas/AtlasEngine.h | 270 +++++++++++++++++++++++---- src/renderer/atlas/AtlasEngine.r.cpp | 61 +----- 3 files changed, 267 insertions(+), 166 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 6357b3b52ec..a93a3e18e66 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -25,20 +25,6 @@ using namespace Microsoft::Console::Render; -#pragma warning(push) -#pragma warning(disable : 26447) // The function is declared 'noexcept' but calls function 'operator()()' which may throw exceptions (f.6). -__declspec(noinline) static void showOOMWarning() noexcept -{ - [[maybe_unused]] static const auto once = []() { - std::thread t{ []() noexcept { - MessageBoxW(nullptr, L"This application is using a highly experimental text rendering engine and has run out of memory. Text rendering will start to behave irrationally and you should restart this process.", L"Out Of Memory", MB_ICONERROR | MB_OK); - } }; - t.detach(); - return false; - }(); -} -#pragma warning(pop) - struct TextAnalyzer final : IDWriteTextAnalysisSource, IDWriteTextAnalysisSink { constexpr TextAnalyzer(const std::vector& text, std::vector& results) noexcept : @@ -365,12 +351,14 @@ try } } - _api.dirtyRect = til::rect{ - 0, - _api.invalidatedRows.x, - _api.cellCount.x, - _api.invalidatedRows.y, - }; + if constexpr (debugGlyphGenerationPerformance) + { + _api.dirtyRect = til::rect{ 0, 0, _api.cellCount.x, _api.cellCount.y }; + } + else + { + _api.dirtyRect = til::rect{ 0, _api.invalidatedRows.x, _api.cellCount.x, _api.invalidatedRows.y }; + } return S_OK; } @@ -394,7 +382,7 @@ CATCH_RETURN() [[nodiscard]] bool AtlasEngine::RequiresContinuousRedraw() noexcept { - return continuousRedraw; + return debugGeneralPerformance; } void AtlasEngine::WaitUntilCanRender() noexcept @@ -559,9 +547,10 @@ try const auto point = options.coordCursor; // TODO: options.coordCursor can contain invalid out of bounds coordinates when // the window is being resized and the cursor is on the last line of the viewport. - const auto x = gsl::narrow_cast(clamp(point.X, 0, _r.cellCount.x - 1)); - const auto y = gsl::narrow_cast(clamp(point.Y, 0, _r.cellCount.y - 1)); - const auto right = gsl::narrow_cast(x + 1 + (options.fIsDoubleWidth & (options.cursorType != CursorType::VerticalBar))); + const auto x = gsl::narrow_cast(clamp(point.X, 0, _r.cellCount.x - 1)); + const auto y = gsl::narrow_cast(clamp(point.Y, 0, _r.cellCount.y - 1)); + const auto cursorWidth = 1 + (options.fIsDoubleWidth & (options.cursorType != CursorType::VerticalBar)); + const auto right = gsl::narrow_cast(clamp(x + cursorWidth, 0, _r.cellCount.x - 0)); const auto bottom = gsl::narrow_cast(y + 1); _setCellFlags({ x, y, right, bottom }, CellFlags::Cursor, CellFlags::Cursor); } @@ -775,7 +764,7 @@ void AtlasEngine::_createSwapChain() // D3D swap chain setup (the thing that allows us to present frames on the screen) { - const auto supportsFrameLatencyWaitableObject = IsWindows8Point1OrGreater(); + const auto supportsFrameLatencyWaitableObject = !debugGeneralPerformance && IsWindows8Point1OrGreater(); // With C++20 we'll finally have designated initializers. DXGI_SWAP_CHAIN_DESC1 desc{}; @@ -899,6 +888,7 @@ void AtlasEngine::_recreateSizeDependentResources() // (40x on AMD Zen1-3, which have a rep movsb performance issue. MSFT:33358259.) _r.cells = Buffer{ totalCellCount }; _r.cellCount = _api.cellCount; + _r.tileAllocator.setMaxArea(_api.sizeInPixel); // .clear() doesn't free the memory of these buffers. // This code allows them to shrink again. @@ -947,32 +937,14 @@ void AtlasEngine::_recreateFontDependentResources() // D3D { - // TODO: Consider using IDXGIAdapter3::QueryVideoMemoryInfo() and IDXGIAdapter3::RegisterVideoMemoryBudgetChangeNotificationEvent() - // That way we can make better to use of a user's available video memory. - - static constexpr size_t sizePerPixel = 4; - static constexpr size_t sizeLimit = D3D10_REQ_RESOURCE_SIZE_IN_MEGABYTES * 1024 * 1024; - const size_t dimensionLimit = _r.device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ? D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION : D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; - const size_t csx = _api.fontMetrics.cellSize.x; - const size_t csy = _api.fontMetrics.cellSize.y; - const auto xLimit = (dimensionLimit / csx) * csx; - const auto pixelsPerCellRow = xLimit * csy; - const auto yLimitDueToDimension = (dimensionLimit / csy) * csy; - const auto yLimitDueToSize = ((sizeLimit / sizePerPixel) / pixelsPerCellRow) * csy; - const auto yLimit = std::min(yLimitDueToDimension, yLimitDueToSize); const auto scaling = GetScaling(); _r.cellSizeDIP.x = static_cast(_api.fontMetrics.cellSize.x) / scaling; _r.cellSizeDIP.y = static_cast(_api.fontMetrics.cellSize.y) / scaling; _r.cellSize = _api.fontMetrics.cellSize; _r.cellCount = _api.cellCount; - // x/yLimit are strictly smaller than dimensionLimit, which is smaller than a u16. - _r.atlasSizeInPixelLimit = u16x2{ gsl::narrow_cast(xLimit), gsl::narrow_cast(yLimit) }; _r.atlasSizeInPixel = { 0, 0 }; - // The first Cell at {0, 0} is always our cursor texture. - // --> The first glyph starts at {1, 0}. - _r.atlasPosition.x = _api.fontMetrics.cellSize.x; - _r.atlasPosition.y = 0; + _r.tileAllocator = TileAllocator{ _r.cellSize, _api.sizeInPixel }; _r.glyphs = {}; _r.glyphQueue = {}; @@ -1118,26 +1090,6 @@ void AtlasEngine::_setCellFlags(u16r coords, CellFlags mask, CellFlags bits) noe } } -AtlasEngine::u16x2 AtlasEngine::_allocateAtlasTile() noexcept -{ - const auto ret = _r.atlasPosition; - - _r.atlasPosition.x += _r.cellSize.x; - if (_r.atlasPosition.x >= _r.atlasSizeInPixelLimit.x) - { - _r.atlasPosition.x = 0; - _r.atlasPosition.y += _r.cellSize.y; - if (_r.atlasPosition.y >= _r.atlasSizeInPixelLimit.y) - { - _r.atlasPosition.x = _r.cellSize.x; - _r.atlasPosition.y = 0; - showOOMWarning(); - } - } - - return ret; -} - void AtlasEngine::_flushBufferLine() { if (_api.bufferLine.empty()) @@ -1449,11 +1401,10 @@ void AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, si auto attributes = _api.attributes; attributes.cellCount = cellCount; - const auto [it, inserted] = _r.glyphs.emplace(std::piecewise_construct, std::forward_as_tuple(attributes, gsl::narrow(charCount), chars), std::forward_as_tuple()); - const auto& key = it->first; - auto& value = it->second; + AtlasKey key{ attributes, gsl::narrow(charCount), chars }; + auto valueRef = _r.glyphs.find(key); - if (inserted) + if (!valueRef) { // Do fonts exist *in practice* which contain both colored and uncolored glyphs? I'm pretty sure... // However doing it properly means using either of: @@ -1481,17 +1432,24 @@ void AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, si WI_SetFlagIf(flags, CellFlags::ColoredGlyph, fontFace2 && fontFace2->IsColorFont()); } - const auto coords = value.initialize(flags, cellCount); + u16x2* coords; + AtlasValue value{ flags, cellCount, &coords }; for (u16 i = 0; i < cellCount; ++i) { - coords[i] = _allocateAtlasTile(); + coords[i] = _r.tileAllocator.allocate(_r.glyphs); } - _r.glyphQueue.push_back(AtlasQueueItem{ &key, &value }); + const auto it = _r.glyphs.insert(std::move(key), std::move(value)); + valueRef = &it->second; + _r.glyphQueue.emplace_back(&it->first, &it->second); _r.maxEncounteredCellCount = std::max(_r.maxEncounteredCellCount, cellCount); } - const auto valueData = value.data(); + // For some reason MSVC doesn't understand that valueRef is overwritten in the branch above, resulting in: + // C26430: Symbol 'valueRef' is not tested for nullness on all paths (f.23). + __assume(valueRef != nullptr); + + const auto valueData = valueRef->data(); const auto coords = &valueData->coords[0]; const auto data = _getCell(x1, _api.lastPaintBufferLineCoord.y); diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 92a22c0a086..7fc20e4df22 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -106,12 +106,6 @@ namespace Microsoft::Console::Render T y{}; ATLAS_POD_OPS(vec2) - - constexpr vec2 operator/(const vec2& rhs) noexcept - { - assert(rhs.x != 0 && rhs.y != 0); - return { gsl::narrow_cast(x / rhs.x), gsl::narrow_cast(y / rhs.y) }; - } }; template @@ -243,6 +237,26 @@ namespace Microsoft::Console::Render return _size; } + T* begin() noexcept + { + return _data; + } + + T* begin() const noexcept + { + return _data; + } + + T* end() noexcept + { + return _data + _size; + } + + T* end() const noexcept + { + return _data + _size; + } + private: // These two functions don't need to use scoped objects or standard allocators, // since this class is in fact an scoped allocator object itself. @@ -302,23 +316,8 @@ namespace Microsoft::Console::Render constexpr SmallObjectOptimizer() = default; - SmallObjectOptimizer(const SmallObjectOptimizer& other) - { - const auto otherData = other.data(); - const auto otherSize = other.size(); - const auto data = initialize(otherSize); - memcpy(data, otherData, otherSize); - } - - SmallObjectOptimizer& operator=(const SmallObjectOptimizer& other) - { - if (this != &other) - { - delete this; - new (this) SmallObjectOptimizer(other); - } - return &this; - } + SmallObjectOptimizer(const SmallObjectOptimizer& other) = delete; + SmallObjectOptimizer& operator=(const SmallObjectOptimizer& other) = delete; SmallObjectOptimizer(SmallObjectOptimizer&& other) noexcept { @@ -484,14 +483,6 @@ namespace Microsoft::Console::Render } }; - struct AtlasKeyHasher - { - size_t operator()(const AtlasKey& key) const noexcept - { - return key.hash(); - } - }; - struct AtlasValueData { CellFlags flags = CellFlags::None; @@ -500,15 +491,14 @@ namespace Microsoft::Console::Render struct AtlasValue { - constexpr AtlasValue() = default; - - u16x2* initialize(CellFlags flags, u16 cellCount) + AtlasValue(CellFlags flags, u16 cellCount, u16x2** coords) { + __assume(coords != nullptr); const auto size = dataSize(cellCount); const auto data = _data.initialize(size); WI_SetFlagIf(flags, CellFlags::Inlined, _data.would_inline(size)); data->flags = flags; - return &data->coords[0]; + *coords = &data->coords[0]; } const AtlasValueData* data() const noexcept @@ -531,6 +521,209 @@ namespace Microsoft::Console::Render const AtlasValue* value; }; + struct AtlasKeyHasher + { + using is_transparent = int; + + size_t operator()(const AtlasKey& v) const noexcept + { + return v.hash(); + } + + size_t operator()(const std::list>::iterator& v) const noexcept + { + return operator()(v->first); + } + }; + + struct AtlasKeyEq + { + using is_transparent = int; + + bool operator()(const AtlasKey& a, const std::list>::iterator& b) const noexcept + { + return a == b->first; + } + + bool operator()(const std::list>::iterator& a, const std::list>::iterator& b) const noexcept + { + return operator()(a->first, b); + } + }; + + struct TileHashMap + { + TileHashMap() noexcept = default; + + AtlasValue* find(const AtlasKey& key) + { + const auto it = _map.find(key); + if (it != _map.end()) + { + _lru.splice(_lru.begin(), _lru, *it); + return &(*it)->second; + } + return nullptr; + } + + std::list>::iterator insert(AtlasKey&& key, AtlasValue&& value) + { + // && decays to & when passed as an argument to emplace(). + // What a fantastic language. + _lru.emplace_front(std::move(key), std::move(value)); + auto it = _lru.begin(); + _map.emplace(it); + return it; + } + + void popOldestTiles(std::vector& out) noexcept + { + Expects(!_lru.empty()); + const auto it = --_lru.end(); + + const auto key = it->first.data(); + const auto value = it->second.data(); + const auto beg = &value->coords[0]; + const auto cellCount = key->attributes.cellCount; + + const auto offset = out.size(); + out.resize(offset + cellCount); + std::copy_n(beg, cellCount, out.begin() + offset); + + _map.erase(it); + _lru.pop_back(); + } + + private: + // Please don't copy this code. It's a proof-of-concept. + // If you need a LRU hash-map, write a custom one with an intrusive + // prev/next linked list (it's easier than you might think!). + std::list> _lru; + std::unordered_set>::iterator, AtlasKeyHasher, AtlasKeyEq> _map; + }; + + struct TileAllocator + { + TileAllocator() = default; + + explicit TileAllocator(u16x2 tileSize, u16x2 windowSize) noexcept : + _tileSize{ tileSize } + { + const auto initialSize = std::max(u16{ _absoluteMinSize }, std::bit_ceil(std::max(tileSize.x, tileSize.y))); + _size = { initialSize, initialSize }; + _limit = { gsl::narrow_cast(initialSize - _tileSize.x), gsl::narrow_cast(initialSize - _tileSize.y) }; + setMaxArea(windowSize); + } + + u16x2 size() const noexcept + { + return _size; + } + + void setMaxArea(u16x2 windowSize) noexcept + { + // _generate() uses a quadratic growth factor for _size's area. + // Once it exceeds the _maxArea, it'll start snatching tiles back from the + // TileHashMap using it's LRU queue. Since _size will at least reach half + // of _maxSize (because otherwise it could still grow by a factor of 2) + // and by ensuring that _maxArea is at least twice the window size + // we make it impossible* for _generate() to return false before + // TileHashMap contains at least as many tiles as the window contains. + // If that wasn't the case we'd snatch and reuse tiles that are still in use. + // * lhecker's legal department: + // No responsibility is taken for the correctness of this information. + setMaxArea(static_cast(windowSize.x) * static_cast(windowSize.y) * 2); + } + + void setMaxArea(size_t max) noexcept + { + _maxArea = clamp(max, _absoluteMinArea, _absoluteMaxArea); + _updateCanGenerate(); + } + + u16x2 allocate(TileHashMap& map) noexcept + { + if (_generate()) + { + return _pos; + } + + if (_cache.empty()) + { + map.popOldestTiles(_cache); + } + + const auto pos = _cache.back(); + _cache.pop_back(); + return pos; + } + + private: + bool _generate() noexcept + { + if (!_canGenerate) + { + return false; + } + + _pos.x += _tileSize.x; + if (_pos.x <= _limit.x) + { + return true; + } + + _pos.y += _tileSize.y; + if (_pos.y <= _limit.y) + { + _pos.x = _originX; + return true; + } + + // This implements a quadratic growth factor for _size, by + // alternating between an 1:1 and 2:1 aspect ratio, like so: + // (64,64) -> (128,64) -> (128,128) -> (256,128) -> (256,256) + // This behavior is strictly dependent on setMaxArea(u16x2)'s + // behavior. See it's comment for an explanation. + if (_size.x == _size.y) + { + _size.x *= 2; + _pos.y = 0; + } + else + { + _size.y *= 2; + _pos.x = 0; + } + _limit = { gsl::narrow_cast(_size.x - _tileSize.x), gsl::narrow_cast(_size.y - _tileSize.y) }; + _originX = _pos.x; + + _updateCanGenerate(); + return _canGenerate; + } + + void _updateCanGenerate() noexcept + { + _canGenerate = static_cast(_size.x) * static_cast(_size.y) <= _maxArea; + } + + static constexpr u16 _absoluteMinSize = 256; + static constexpr size_t _absoluteMinArea = _absoluteMinSize * _absoluteMinSize; + // TODO: Consider using IDXGIAdapter3::QueryVideoMemoryInfo() and IDXGIAdapter3::RegisterVideoMemoryBudgetChangeNotificationEvent() + // That way we can make better to use of a user's available video memory. + static constexpr size_t _absoluteMaxArea = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION * D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; + + std::vector _cache; + size_t _maxArea = _absoluteMaxArea; + u16x2 _tileSize; + u16x2 _size; + u16x2 _limit; + // Since _pos starts at {0, 0}, it'll result in the first allocate()d tile to be at {_tileSize.x, 0}. + // Coincidentially that's exactly what we want as the cursor texture lives at {0, 0}. + u16x2 _pos; + u16 _originX = 0; + bool _canGenerate = true; + }; + struct CachedCursorOptions { u32 cursorColor = INVALID_COLOR; @@ -613,7 +806,6 @@ namespace Microsoft::Console::Render const Buffer& _getTextFormatAxis(bool bold, bool italic) const noexcept; Cell* _getCell(u16 x, u16 y) noexcept; void _setCellFlags(u16r coords, CellFlags mask, CellFlags bits) noexcept; - u16x2 _allocateAtlasTile() noexcept; void _flushBufferLine(); void _emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, size_t bufferPos2); @@ -634,7 +826,6 @@ namespace Microsoft::Console::Render static constexpr bool debugGlyphGenerationPerformance = false; static constexpr bool debugGeneralPerformance = false || debugGlyphGenerationPerformance; - static constexpr bool continuousRedraw = false || debugGeneralPerformance; static constexpr u16 u16min = 0x0000; static constexpr u16 u16max = 0xffff; @@ -693,10 +884,9 @@ namespace Microsoft::Console::Render u16 dpi = USER_DEFAULT_SCREEN_DPI; // invalidated by ApiInvalidations::Font, caches _api.dpi u16 maxEncounteredCellCount = 0; u16 scratchpadCellWidth = 0; - u16x2 atlasSizeInPixelLimit; // invalidated by ApiInvalidations::Font u16x2 atlasSizeInPixel; // invalidated by ApiInvalidations::Font - u16x2 atlasPosition; - std::unordered_map glyphs; + TileHashMap glyphs; + TileAllocator tileAllocator; std::vector glyphQueue; f32 gamma = 0; diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 804f2687b66..d36dd77d689 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -64,7 +64,7 @@ try // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: // > For every frame it renders, the app should wait on this handle before starting any rendering operations. // > Note that this requirement includes the first frame the app renders with the swap chain. - assert(_r.frameLatencyWaitableObjectUsed); + assert(debugGeneralPerformance || _r.frameLatencyWaitableObjectUsed); // > IDXGISwapChain::Present: Partial Presentation (using a dirty rects or scroll) is not supported // > for SwapChains created with DXGI_SWAP_EFFECT_DISCARD or DXGI_SWAP_EFFECT_FLIP_DISCARD. @@ -139,65 +139,18 @@ void AtlasEngine::_updateConstantBuffer() const noexcept void AtlasEngine::_adjustAtlasSize() { - if (_r.atlasPosition.y < _r.atlasSizeInPixel.y && _r.atlasPosition.x < _r.atlasSizeInPixel.x) + const auto atlasSize = _r.tileAllocator.size(); + if (atlasSize.y <= _r.atlasSizeInPixel.y && atlasSize.x <= _r.atlasSizeInPixel.x) { return; } - const u32 limitX = _r.atlasSizeInPixelLimit.x; - const u32 limitY = _r.atlasSizeInPixelLimit.y; - const u32 posX = _r.atlasPosition.x; - const u32 posY = _r.atlasPosition.y; - const u32 cellX = _r.cellSize.x; - const u32 cellY = _r.cellSize.y; - const auto perCellArea = cellX * cellY; - - // The texture atlas is filled like this: - // x → - // y +--------------+ - // ↓ |XXXXXXXXXXXXXX| - // |XXXXXXXXXXXXXX| - // |XXXXX↖ | - // | | | - // +------|-------+ - // This is where _r.atlasPosition points at. - // - // Each X is a glyph texture tile that's occupied. - // We can compute the area of pixels consumed by adding the first - // two lines of X (rectangular) together with the last line of X. - const auto currentArea = posY * limitX + posX * cellY; - // minArea reserves enough room for 64 cells in all cases (mainly during startup). - const auto minArea = 64 * perCellArea; - auto newArea = std::max(minArea, currentArea); - - // I want the texture to grow exponentially similar to std::vector, as this - // ensures we don't need to resize the texture again right after having done. - // This rounds newArea up to the next power of 2. - unsigned long int index; - _BitScanReverse(&index, newArea); // newArea can't be 0 - newArea = u32{ 1 } << (index + 1); - - const auto pixelPerRow = limitX * cellY; - // newArea might be just large enough that it spans N full rows of cells and one additional row - // just barely. This algorithm rounds up newArea to the _next_ multiple of cellY. - const auto wantedHeight = (newArea + pixelPerRow - 1) / pixelPerRow * cellY; - // The atlas might either be a N rows of full width (xLimit) or just one - // row (where wantedHeight == cellY) that doesn't quite fill it's maximum width yet. - const auto wantedWidth = wantedHeight != cellY ? limitX : newArea / perCellArea * cellX; - - // We know that limitX/limitY were u16 originally, and thus it's safe to narrow_cast it back. - const auto height = gsl::narrow_cast(std::min(limitY, wantedHeight)); - const auto width = gsl::narrow_cast(std::min(limitX, wantedWidth)); - - assert(width != 0); - assert(height != 0); - wil::com_ptr atlasBuffer; wil::com_ptr atlasView; { D3D11_TEXTURE2D_DESC desc{}; - desc.Width = width; - desc.Height = height; + desc.Width = atlasSize.x; + desc.Height = atlasSize.y; desc.MipLevels = 1; desc.ArraySize = 1; desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; @@ -222,7 +175,7 @@ void AtlasEngine::_adjustAtlasSize() _r.deviceContext->CopySubresourceRegion1(atlasBuffer.get(), 0, 0, 0, 0, _r.atlasBuffer.get(), 0, &box, D3D11_COPY_NO_OVERWRITE); } - _r.atlasSizeInPixel = u16x2{ width, height }; + _r.atlasSizeInPixel = atlasSize; _r.atlasBuffer = std::move(atlasBuffer); _r.atlasView = std::move(atlasView); _setShaderResources(); @@ -346,7 +299,7 @@ void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const _r.d2dRenderTarget->DrawTextLayout({}, textLayout.get(), _r.brush.get(), options); THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); - for (uint32_t i = 0; i < cells; ++i) + for (u32 i = 0; i < cells; ++i) { // Specifying NO_OVERWRITE means that the system can assume that existing references to the surface that // may be in flight on the GPU will not be affected by the update, so the copy can proceed immediately From 0641bb38e8c2fa29ce6c17ecee27ada235287324 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 8 Jul 2022 04:35:03 +0200 Subject: [PATCH 2/5] Add more comments --- src/renderer/atlas/AtlasEngine.cpp | 6 +++++- src/renderer/atlas/AtlasEngine.h | 30 ++++++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index a93a3e18e66..edeec1fb187 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -1402,7 +1402,7 @@ void AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, si attributes.cellCount = cellCount; AtlasKey key{ attributes, gsl::narrow(charCount), chars }; - auto valueRef = _r.glyphs.find(key); + const AtlasValue* valueRef = _r.glyphs.find(key); if (!valueRef) { @@ -1432,8 +1432,12 @@ void AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, si WI_SetFlagIf(flags, CellFlags::ColoredGlyph, fontFace2 && fontFace2->IsColorFont()); } + // The AtlasValue constructor fills the `coords` variable with a pointer to an array + // of at least `cellCount` elements. I did this so that I don't have to type out + // `value.data()->coords` again, despite the constructor having all the data necessary. u16x2* coords; AtlasValue value{ flags, cellCount, &coords }; + for (u16 i = 0; i < cellCount; ++i) { coords[i] = _r.tileAllocator.allocate(_r.glyphs); diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 7fc20e4df22..1f25b63f992 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -560,6 +560,7 @@ namespace Microsoft::Console::Render const auto it = _map.find(key); if (it != _map.end()) { + // Move the key to the head of the LRU queue. _lru.splice(_lru.begin(), _lru, *it); return &(*it)->second; } @@ -568,8 +569,10 @@ namespace Microsoft::Console::Render std::list>::iterator insert(AtlasKey&& key, AtlasValue&& value) { - // && decays to & when passed as an argument to emplace(). - // What a fantastic language. + // Insert the key/value right at the head of the LRU queue, just like find(). + // + // && decays to & if the argument is named, because C++ is a simple language + // and so you have to std::move it again, because C++ is a simple language. _lru.emplace_front(std::move(key), std::move(value)); auto it = _lru.begin(); _map.emplace(it); @@ -602,6 +605,18 @@ namespace Microsoft::Console::Render std::unordered_set>::iterator, AtlasKeyHasher, AtlasKeyEq> _map; }; + // TileAllocator yields `tileSize`-sized tiles for our texture atlas. + // While doing so it'll grow the atlas size() by a factor of 2 if needed. + // Once the setMaxArea() is exceeded it'll stop growing and instead + // snatch tiles back from the oldest TileHashMap entries. + // + // The quadratic growth works by alternating the size() + // between an 1:1 and 2:1 aspect ratio, like so: + // (64,64) -> (128,64) -> (128,128) -> (256,128) -> (256,256) + // These initial tile positions allocate() returns are in a Z + // pattern over the available space in the atlas texture. + // You can log the `return _pos;` in allocate() using "Tracepoint"s + // in Visual Studio if you'd like to understand the Z pattern better. struct TileAllocator { TileAllocator() = default; @@ -624,7 +639,7 @@ namespace Microsoft::Console::Render { // _generate() uses a quadratic growth factor for _size's area. // Once it exceeds the _maxArea, it'll start snatching tiles back from the - // TileHashMap using it's LRU queue. Since _size will at least reach half + // TileHashMap using its LRU queue. Since _size will at least reach half // of _maxSize (because otherwise it could still grow by a factor of 2) // and by ensuring that _maxArea is at least twice the window size // we make it impossible* for _generate() to return false before @@ -659,6 +674,8 @@ namespace Microsoft::Console::Render } private: + // This method generates the Z pattern coordinates + // described above in the TileAllocator comment. bool _generate() noexcept { if (!_canGenerate) @@ -679,9 +696,12 @@ namespace Microsoft::Console::Render return true; } + // Same as for pos. + const auto size = _size; + // This implements a quadratic growth factor for _size, by // alternating between an 1:1 and 2:1 aspect ratio, like so: - // (64,64) -> (128,64) -> (128,128) -> (256,128) -> (256,256) + // (64,64) -> (128,64) -> (128,128) -> (256,128) -> (256,256) // This behavior is strictly dependent on setMaxArea(u16x2)'s // behavior. See it's comment for an explanation. if (_size.x == _size.y) @@ -721,6 +741,8 @@ namespace Microsoft::Console::Render // Coincidentially that's exactly what we want as the cursor texture lives at {0, 0}. u16x2 _pos; u16 _originX = 0; + // Indicates whether we've exhausted our Z pattern across the atlas texture. + // If this is false, we have to snatch tiles back from TileHashMap. bool _canGenerate = true; }; From b5763fae08cb8a7e106248b7784796670daa45e1 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 8 Jul 2022 04:35:30 +0200 Subject: [PATCH 3/5] Fix setMaxArea not accounting for the cursor --- src/renderer/atlas/AtlasEngine.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 1f25b63f992..f85b5ab185b 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -652,7 +652,10 @@ namespace Microsoft::Console::Render void setMaxArea(size_t max) noexcept { - _maxArea = clamp(max, _absoluteMinArea, _absoluteMaxArea); + // We need to reserve at least 1 extra `tileArea`, because the tile + // at position {0,0} is already reserved for the cursor texture. + const auto tileArea = static_cast(_tileSize.x) * static_cast(_tileSize.y); + _maxArea = clamp(max + tileArea, _absoluteMinArea, _absoluteMaxArea); _updateCanGenerate(); } From 4ba1f8dd6a49f639a49d4aeb5a56042fd0cd0678 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 8 Jul 2022 05:06:44 +0200 Subject: [PATCH 4/5] Fix TileAllocator not responding correctly to resizes --- src/renderer/atlas/AtlasEngine.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index f85b5ab185b..f75fb093402 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -686,6 +686,11 @@ namespace Microsoft::Console::Render return false; } + // We need to backup _pos/_size in case our resize below exceeds _maxArea. + // In that case we have to restore _pos/_size so that if _maxArea is increased + // (window resize for instance), we can pick up were we previously left off. + const auto pos = _pos; + _pos.x += _tileSize.x; if (_pos.x <= _limit.x) { @@ -717,10 +722,19 @@ namespace Microsoft::Console::Render _size.y *= 2; _pos.x = 0; } - _limit = { gsl::narrow_cast(_size.x - _tileSize.x), gsl::narrow_cast(_size.y - _tileSize.y) }; - _originX = _pos.x; _updateCanGenerate(); + if (_canGenerate) + { + _limit = { gsl::narrow_cast(_size.x - _tileSize.x), gsl::narrow_cast(_size.y - _tileSize.y) }; + _originX = _pos.x; + } + else + { + _size = size; + _pos = pos; + } + return _canGenerate; } From 4c7738a0093be5dfd7e08e1fce1412bb515dcd0a Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 8 Jul 2022 18:56:54 +0200 Subject: [PATCH 5/5] Address feedback --- src/renderer/atlas/AtlasEngine.h | 2 +- src/renderer/atlas/AtlasEngine.r.cpp | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index f75fb093402..d1c3ac6c3fe 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -688,7 +688,7 @@ namespace Microsoft::Console::Render // We need to backup _pos/_size in case our resize below exceeds _maxArea. // In that case we have to restore _pos/_size so that if _maxArea is increased - // (window resize for instance), we can pick up were we previously left off. + // (window resize for instance), we can pick up where we previously left off. const auto pos = _pos; _pos.x += _tileSize.x; diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index d36dd77d689..da86488f758 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -139,8 +139,12 @@ void AtlasEngine::_updateConstantBuffer() const noexcept void AtlasEngine::_adjustAtlasSize() { - const auto atlasSize = _r.tileAllocator.size(); - if (atlasSize.y <= _r.atlasSizeInPixel.y && atlasSize.x <= _r.atlasSizeInPixel.x) + // Only grow the atlas texture if our tileAllocator needs it to be larger. + // We have no way of shrinking our tileAllocator at the moment, + // so technically a `requiredSize != _r.atlasSizeInPixel` + // comparison would be sufficient, but better safe than sorry. + const auto requiredSize = _r.tileAllocator.size(); + if (requiredSize.y <= _r.atlasSizeInPixel.y && requiredSize.x <= _r.atlasSizeInPixel.x) { return; } @@ -149,8 +153,8 @@ void AtlasEngine::_adjustAtlasSize() wil::com_ptr atlasView; { D3D11_TEXTURE2D_DESC desc{}; - desc.Width = atlasSize.x; - desc.Height = atlasSize.y; + desc.Width = requiredSize.x; + desc.Height = requiredSize.y; desc.MipLevels = 1; desc.ArraySize = 1; desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; @@ -175,7 +179,7 @@ void AtlasEngine::_adjustAtlasSize() _r.deviceContext->CopySubresourceRegion1(atlasBuffer.get(), 0, 0, 0, 0, _r.atlasBuffer.get(), 0, &box, D3D11_COPY_NO_OVERWRITE); } - _r.atlasSizeInPixel = atlasSize; + _r.atlasSizeInPixel = requiredSize; _r.atlasBuffer = std::move(atlasBuffer); _r.atlasView = std::move(atlasView); _setShaderResources();