diff --git a/Makefile b/Makefile index d66c2c105..79529bb38 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ SHELL = bash ALL_EXCLUDE = third_party .git env build docs/env INSTALL_DIR ?= +ENV_DIR ?= env # Skip this check if the ALLOW_ROOT var is defined # E.g. when running in GH action custom runners CI @@ -20,9 +21,9 @@ endif endif # Tools + Environment -IN_ENV = if [ -e env/bin/activate ]; then . env/bin/activate; fi; source utils/environment.python.sh; +IN_ENV = if [ -e $(ENV_DIR)/bin/activate ]; then . $(ENV_DIR)/bin/activate; fi; source utils/environment.python.sh; env: - python3 -mvenv env + python3 -mvenv $(ENV_DIR) # Install project dependencies $(IN_ENV) python -mpip install -r requirements.txt # Install project's documentation dependencies diff --git a/lib/include/prjxray/xilinx/configuration.h b/lib/include/prjxray/xilinx/configuration.h index 0a8835478..8cbcbf187 100644 --- a/lib/include/prjxray/xilinx/configuration.h +++ b/lib/include/prjxray/xilinx/configuration.h @@ -27,7 +27,16 @@ class Configuration { public: using FrameMap = std::map>; - using PacketData = std::vector; + + struct PacketData { + struct Frame { + typename ArchType::FrameAddress address; + std::vector repeats; + std::vector data; + }; + + std::vector frames; + }; // Returns a configuration, i.e. collection of frame addresses // and corresponding data from a collection of configuration packets. @@ -50,7 +59,8 @@ class Configuration { // which allows for bigger payload compared to type 1. static PacketData createType2ConfigurationPacketData( const typename Frames::Frames2Data& frames, - absl::optional& part); + absl::optional& part, + bool compressed = false); Configuration(const typename ArchType::Part& part, std::map typename Configuration::PacketData Configuration::createType2ConfigurationPacketData( const typename Frames::Frames2Data& frames, - absl::optional& part) { - PacketData packet_data; - // Certain configuration frames blocks are separated by Zero Frames, - // i.e. frames with words with all zeroes. For Series-7, US and US+ - // there zero frames separator consists of two frames. - static const int kZeroFramesSeparatorWords = - ArchType::words_per_frame * 2; - for (auto& frame : frames) { - std::copy(frame.second.begin(), frame.second.end(), - std::back_inserter(packet_data)); - - auto next_address = part->GetNextFrameAddress(frame.first); - if (next_address && - (next_address->block_type() != frame.first.block_type() || - next_address->is_bottom_half_rows() != - frame.first.is_bottom_half_rows() || - next_address->row() != frame.first.row())) { - packet_data.insert(packet_data.end(), - kZeroFramesSeparatorWords, 0); + absl::optional& part, + bool compressed) { + PacketData result; + if (!compressed) { + result.frames.push_back(typename PacketData::Frame{0U, {}, {}}); + std::vector& packet_data = result.frames.back().data; + // Certain configuration frames blocks are separated by Zero + // Frames, i.e. frames with words with all zeroes. For Series-7, + // US and US+ there zero frames separator consists of two + // frames. + static const int kZeroFramesSeparatorWords = + ArchType::words_per_frame * 2; + for (auto& frame : frames) { + std::copy(frame.second.begin(), frame.second.end(), + std::back_inserter(packet_data)); + + auto next_address = + part->GetNextFrameAddress(frame.first); + if (next_address && + (next_address->block_type() != + frame.first.block_type() || + next_address->is_bottom_half_rows() != + frame.first.is_bottom_half_rows() || + next_address->row() != frame.first.row())) { + packet_data.insert(packet_data.end(), + kZeroFramesSeparatorWords, + 0); + } + } + packet_data.insert(packet_data.end(), kZeroFramesSeparatorWords, + 0); + } else { + // First write takes priority. + // FDRI writes must be padded with a trailing zero-frame. + // FDRI writes followed by MFWRs must only write to a single + // frame. + // Frame writes can be joined, so long as the frame written + // to with the trailing zero-frame has already been written + // to, or is meant to be a zero-frame. + + using Frame = typename PacketData::Frame; + + auto similar_address = + [](const typename ArchType::FrameAddress& a, + const typename ArchType::FrameAddress& b) -> bool { + return a.block_type() == b.block_type() && + a.is_bottom_half_rows() == + b.is_bottom_half_rows() && + a.row() == b.row(); + }; + + for (const auto& frame : frames) { + result.frames.push_back( + Frame{frame.first, {}, frame.second}); + } + + auto dedup = [](auto begin, auto end, auto compare, + auto merge) { + while (begin != end) { + auto mid = std::stable_partition( + begin + 1, end, [&](const Frame& f) { + return !compare(*begin, f); + }); + for (auto it = mid; it != end; ++it) + merge(*begin, *it); + end = mid; + if (begin != end) + ++begin; + } + return begin; + }; + + auto can_merge = [&](const Frame& a, const Frame& b) -> bool { + return b.repeats.empty() && + similar_address(a.address, b.address) && + a.data == b.data; + }; + + auto merge = [](Frame& dst, Frame& src) { + dst.repeats.push_back(src.address); + }; + + result.frames.erase( + dedup(result.frames.begin(), result.frames.end(), can_merge, + merge), + result.frames.end()); + + std::set deduped_frames; + + auto zero_frames_between = + [&](const typename ArchType::FrameAddress& a, + const typename ArchType::FrameAddress& b, + size_t max) -> size_t { + if (a >= b) + return 0; + auto next = part->GetNextFrameAddress(a); + for (size_t result = 1; + result <= max && next && *next <= b && + deduped_frames.count(*next) > 0U; + ++result, + next = part->GetNextFrameAddress(*next)) { + if (*next == b) + return result; + } + return 0; + }; + + // Merge contiguous frames + Frame* previous = nullptr; + absl::optional + previous_next_address; + for (auto& frame : result.frames) { + if (!frame.repeats.empty()) { + if (previous) + deduped_frames.insert( + previous->repeats.begin(), + previous->repeats.end()); + previous = &frame; + } else { + if (previous_next_address) { + const size_t between = + zero_frames_between( + *previous_next_address, + frame.address, 2U); + if (between > 0U) { + previous->data.resize( + previous->data.size() + + (ArchType:: + words_per_frame * + between), + 0U); + previous_next_address = + frame.address; + } + } + if (previous_next_address && + *previous_next_address == frame.address) { + previous->data.insert( + previous->data.end(), + frame.data.begin(), + frame.data.end()); + frame.data.clear(); + } else { + if (previous) + deduped_frames.insert( + previous->repeats.begin(), + previous->repeats.end()); + previous = &frame; + } + } + if (previous) + previous_next_address = + part->GetNextFrameAddress(frame.address); + } + + result.frames.erase( + std::remove_if( + result.frames.begin(), result.frames.end(), + [](const Frame& frame) { return frame.data.empty(); }), + result.frames.end()); + + for (auto& frame : result.frames) { + if (frame.repeats.empty()) { + frame.data.resize(frame.data.size() + + ArchType::words_per_frame, + 0U); + } } } - packet_data.insert(packet_data.end(), kZeroFramesSeparatorWords, 0); - return packet_data; + return result; } template <> @@ -241,6 +398,8 @@ Configuration::InitWithPackets(const typename ArchType::Part& part, // Internal state machine for writes. bool start_new_write = false; + bool start_dup_write = false; + typename ArchType::FrameAddress last_write_frame_address = 0; typename ArchType::FrameAddress current_frame_address = 0; Configuration::FrameMap frames; @@ -272,6 +431,8 @@ Configuration::InitWithPackets(const typename ArchType::Part& part, // for the next FDIR. if (command_register == 0x1) { start_new_write = true; + } else if (command_register == 0x2) { + start_dup_write = true; } break; case ArchType::ConfRegType::IDCODE: @@ -293,31 +454,35 @@ Configuration::InitWithPackets(const typename ArchType::Part& part, // Per UG470, the command present in the CMD // register is executed each time the FAR - // register is laoded with a new value. As we - // only care about WCFG commands, just check - // that here. CTRL1 is completely undocumented - // but looking at generated bitstreams, bit 21 - // is used when per-frame CRC is enabled. - // Setting this bit seems to inhibit the - // re-execution of CMD during a FAR write. In - // practice, this is used so FAR writes can be - // added in the bitstream to show progress + // register is loaded with a new value. As we + // only care about WCFG and MFWR commands, just + // check that here. CTRL1 is completely + // undocumented but looking at generated + // bitstreams, bit 21 is used when per-frame CRC + // is enabled. Setting this bit seems to inhibit + // the re-execution of CMD during a FAR write. + // In practice, this is used so FAR writes can + // be added in the bitstream to show progress // markers without impacting the actual write // operation. - if (bit_field_get(ctl1_register, 21, 21) == 0 && - command_register == 0x1) { - start_new_write = true; + if (bit_field_get(ctl1_register, 21, 21) == 0) { + if (command_register == 0x1) { + start_new_write = true; + } else if (command_register == 0x2) { + start_dup_write = true; + } } break; case ArchType::ConfRegType::FDRI: { if (start_new_write) { - current_frame_address = - frame_address_register; + last_write_frame_address = + current_frame_address = + frame_address_register; start_new_write = false; } // Number of words in configuration frames - // depend on tje architecture. Writes to this + // depend on the architecture. Writes to this // register can be multiples of that number to // do auto-incrementing block writes. for (size_t ii = 0; ii < packet.data().size(); @@ -351,6 +516,15 @@ Configuration::InitWithPackets(const typename ArchType::Part& part, } break; } + case ArchType::ConfRegType::MFWR: { + if (start_dup_write) { + current_frame_address = + frame_address_register; + start_dup_write = false; + frames[current_frame_address] = + frames[last_write_frame_address]; + } + } break; default: break; } diff --git a/lib/xilinx/configuration.cc b/lib/xilinx/configuration.cc index 881d9cd25..d2b2c3b66 100644 --- a/lib/xilinx/configuration.cc +++ b/lib/xilinx/configuration.cc @@ -31,9 +31,12 @@ template <> Configuration::PacketData Configuration::createType2ConfigurationPacketData( const Frames::Frames2Data& frames, - absl::optional& part) { + absl::optional& part, + bool compressed) { // Generate a single type 2 packet that writes everything at once. - PacketData packet_data; + PacketData result; + result.frames.push_back(typename PacketData::Frame{0U, {}, {}}); + std::vector& packet_data = result.frames.back().data; for (auto& frame : frames) { std::copy(frame.second.begin(), frame.second.end(), std::back_inserter(packet_data)); @@ -44,7 +47,7 @@ Configuration::createType2ConfigurationPacketData( packet_data.insert(packet_data.begin(), packet_data_size & 0xFFFF); packet_data.insert(packet_data.begin(), (packet_data_size >> 16) & 0xFFFF); - return packet_data; + return result; } template <> @@ -219,7 +222,7 @@ void Configuration::createConfigurationPackage( // Frame data write out_packets.emplace_back(new ConfigurationPacket( TYPE2, ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FDRI, {packet_data})); + ConfigurationRegister::FDRI, {packet_data.frames.back().data})); // NOP packets for (int i = 0; i < 24; i++) { @@ -391,24 +394,85 @@ void Configuration::createConfigurationPackage( out_packets.emplace_back(new NopPacket()); out_packets.emplace_back(new NopPacket()); out_packets.emplace_back(new NopPacket()); - out_packets.emplace_back( - new ConfigurationPacketWithPayload<1, ConfigurationRegister>( - ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FAR, {0x0})); - out_packets.emplace_back( - new ConfigurationPacketWithPayload<1, ConfigurationRegister>( - ConfigurationPacket::Opcode::Write, - ConfigurationRegister::CMD, - {static_cast(xc7series::Command::WCFG)})); - out_packets.emplace_back(new NopPacket()); // Frame data write - out_packets.emplace_back(new ConfigurationPacket( - TYPE1, ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FDRI, {})); - out_packets.emplace_back(new ConfigurationPacket( - TYPE2, ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FDRI, packet_data)); + for (const auto& frame : packet_data.frames) { + out_packets.emplace_back(new ConfigurationPacketWithPayload< + 1, ConfigurationRegister>( + ConfigurationPacket::Opcode::Write, + ConfigurationRegister::CMD, + {static_cast(xc7series::Command::WCFG)})); + out_packets.emplace_back(new ConfigurationPacketWithPayload< + 1, ConfigurationRegister>( + ConfigurationPacket::Opcode::Write, + ConfigurationRegister::FAR, {frame.address})); + + out_packets.emplace_back( + new NopPacket()); + + if (frame.data.size() < 0b111'1111'1111) { + out_packets.emplace_back( + new ConfigurationPacket( + TYPE1, + ConfigurationPacket< + ConfigurationRegister>::Opcode::Write, + ConfigurationRegister::FDRI, frame.data)); + } else { + out_packets.emplace_back( + new ConfigurationPacket( + TYPE1, + ConfigurationPacket< + ConfigurationRegister>::Opcode::Write, + ConfigurationRegister::FDRI, {})); + out_packets.emplace_back( + new ConfigurationPacket( + TYPE2, + ConfigurationPacket< + ConfigurationRegister>::Opcode::Write, + ConfigurationRegister::FDRI, frame.data)); + } + + if (!frame.repeats.empty()) { + out_packets.emplace_back( + new ConfigurationPacketWithPayload< + 1, ConfigurationRegister>( + ConfigurationPacket< + ConfigurationRegister>::Opcode::Write, + ConfigurationRegister::CMD, + {static_cast( + xc7series::Command::MFW)})); + + for (size_t i = 0; i < 12; ++i) + out_packets.emplace_back( + new NopPacket()); + + out_packets.emplace_back( + new ConfigurationPacketWithPayload< + 8, ConfigurationRegister>( + ConfigurationPacket< + ConfigurationRegister>::Opcode::Write, + ConfigurationRegister::MFWR, + {0U, 0U, 0U, 0U, 0U, 0U, 0U, 0U})); + + for (const auto& addr : frame.repeats) { + out_packets.emplace_back( + new ConfigurationPacketWithPayload< + 1, ConfigurationRegister>( + ConfigurationPacket< + ConfigurationRegister>::Opcode:: + Write, + ConfigurationRegister::FAR, {addr})); + out_packets.emplace_back( + new ConfigurationPacketWithPayload< + 4, ConfigurationRegister>( + ConfigurationPacket< + ConfigurationRegister>::Opcode:: + Write, + ConfigurationRegister::MFWR, + {0U, 0U, 0U, 0U})); + } + } + } // Finalization sequence out_packets.emplace_back( @@ -569,7 +633,7 @@ void Configuration::createConfigurationPackage( ConfigurationRegister::FDRI, {})); out_packets.emplace_back(new ConfigurationPacket( TYPE2, ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FDRI, packet_data)); + ConfigurationRegister::FDRI, packet_data.frames.back().data)); // Finalization sequence out_packets.emplace_back( @@ -730,7 +794,7 @@ void Configuration::createConfigurationPackage( ConfigurationRegister::FDRI, {})); out_packets.emplace_back(new ConfigurationPacket( TYPE2, ConfigurationPacket::Opcode::Write, - ConfigurationRegister::FDRI, packet_data)); + ConfigurationRegister::FDRI, packet_data.frames.back().data)); // Finalization sequence out_packets.emplace_back( diff --git a/lib/xilinx/tests/spartan6/configuration_test.cc b/lib/xilinx/tests/spartan6/configuration_test.cc index 522438195..82261da7c 100644 --- a/lib/xilinx/tests/spartan6/configuration_test.cc +++ b/lib/xilinx/tests/spartan6/configuration_test.cc @@ -153,7 +153,8 @@ TEST(ConfigurationTest, DISABLED_CheckForPaddingAfterIOBFrame) { frames.getFrames(), test_part); // createType2ConfigurationPacketData should add a 16-bit pad word after // after the IOB frame - EXPECT_EQ(packet_data.size(), 3 * 65 + 1); + EXPECT_EQ(packet_data.frames.size(), 1); + EXPECT_EQ(packet_data.frames[0].data.size(), 3 * 65 + 1); std::vector idcode{0x1234}; std::vector cmd{0x0001}; @@ -182,7 +183,7 @@ TEST(ConfigurationTest, DISABLED_CheckForPaddingAfterIOBFrame) { static_cast(0x1), ConfigurationPacket::Opcode::Write, Spartan6::ConfRegType::FDRI, - absl::MakeSpan(packet_data), + absl::MakeSpan(packet_data.frames[0].data), }, }; diff --git a/lib/xilinx/tests/xc7series/configuration_test.cc b/lib/xilinx/tests/xc7series/configuration_test.cc index 12917786b..11f054bc9 100644 --- a/lib/xilinx/tests/xc7series/configuration_test.cc +++ b/lib/xilinx/tests/xc7series/configuration_test.cc @@ -278,7 +278,8 @@ TEST(ConfigurationTest, CheckForPaddingFrames) { // extra padding frames are added at the end of the creation of the data // overall this gives us: 5(real frames) + 4*2 + 2 = 15 frames, which is // 15 * 101 = 1515 words - EXPECT_EQ(packet_data.size(), 15 * 101); + EXPECT_EQ(packet_data.frames.size(), 1); + EXPECT_EQ(packet_data.frames[0].data.size(), 15 * 101); std::vector idcode{0x1234}; std::vector cmd{0x0001}; @@ -307,7 +308,7 @@ TEST(ConfigurationTest, CheckForPaddingFrames) { static_cast(0x1), ConfigurationPacket::Opcode::Write, Series7::ConfRegType::FDRI, - absl::MakeSpan(packet_data), + absl::MakeSpan(packet_data.frames[0].data), }, }; diff --git a/tools/xc7frames2bit.cc b/tools/xc7frames2bit.cc index 788630190..8c407483f 100644 --- a/tools/xc7frames2bit.cc +++ b/tools/xc7frames2bit.cc @@ -14,6 +14,7 @@ #include #include +DEFINE_bool(compressed, false, "Attempt to deduplicate bitstream frames"); DEFINE_string(part_name, "", "Name of the 7-series part"); DEFINE_string(part_file, "", "Definition file for target 7-series part"); DEFINE_string( @@ -62,7 +63,7 @@ struct Frames2BitWriter { configuration_packet_data( xilinx::Configuration:: createType2ConfigurationPacketData( - frames.getFrames(), part)); + frames.getFrames(), part, FLAGS_compressed)); // Put together a configuration package typename ArchType::ConfigurationPackage configuration_package;