From ea4baeee462163c8f8289b8ec0d4b7522636f9fa Mon Sep 17 00:00:00 2001 From: Deep Patel Date: Mon, 4 Oct 2021 09:39:36 -0500 Subject: [PATCH] Doc updates and test fixes (#538) * Update DCV.md (#697) * Update and fix test failures (#698) * Fixed the non root test * Fixed documentation link failures by adding an updated ssl context * Update afi power doc (#699) * specify afi-power warning/violation triggers * Describe when afi-power-warning and afi-power-violation conditions trigger. * fix indentation of sh_cl_pwr_state Co-authored-by: kyyalama2 <82236989+kyyalama2@users.noreply.github.com> Co-authored-by: AWScsaralay <31708355+AWScsaralay@users.noreply.github.com> --- ERRATA.md | 8 +- developer_resources/DCV.md | 12 +- docs/examples/example_list.md | 1 - .../AWS_Shell_V1.4_Migration_Guidelines.md | 2 +- hdk/docs/Programmer_View.md | 4 +- hdk/docs/afi_power.md | 28 +++-- sdk/apps/virtual-ethernet/README.md | 2 +- sdk/userspace/add_udev_rules.sh | 117 ++++++++++-------- shared/bin/set_common_env_vars.sh | 2 +- shared/tests/bin/check_md_links.py | 4 +- 10 files changed, 98 insertions(+), 82 deletions(-) diff --git a/ERRATA.md b/ERRATA.md index 59d4fc43f..1485d8163 100644 --- a/ERRATA.md +++ b/ERRATA.md @@ -9,7 +9,7 @@ Shell errata is [documented here](./hdk/docs/AWS_Shell_ERRATA.md) * DRAM Data retention is not supported for CL designs with less than 4 DDRs enabled * Combinatorial loops in CL designs are not supported. * We will display a `UNKNOWN_BITSTREAM_GENERATE_ERROR` on detection of a combinatorial loop in the CL design and an AFI will not be generated. -* Connecting one of the clocks provided from the shell (clk_main_a0, clk_extra_a1, etc...) directly to a BUFG in the CL is not supported by the Xilinx tools and may result in a non-functional clock. To workaround this limitation, it is recommended to use an MMCM to feed the BUFG (clk_from_shell -> MMCM -> BUFG). Please refer to [Xilinx AR# 73360](https://www.xilinx.com/support/answers/73360.html) for further details. +* Connecting one of the clocks provided from the shell (clk_main_a0, clk_extra_a1, etc...) directly to a BUFG in the CL is not supported by the Xilinx tools and may result in a non-functional clock. To workaround this limitation, it is recommended to use an MMCM to feed the BUFG (clk_from_shell -> MMCM -> BUFG). Please refer to [Xilinx AR# 73360](https://support.xilinx.com/s/article/73360?language=en_US) for further details. ### flop_ccf.sv bug @@ -35,7 +35,7 @@ AWS customers may experience hardware failures including: post calibration data To detect if your build is impacted by this bug, AWS recommends all EC2 F1 customers utilizing the DDR4 IP in their designs should run a TCL script on the design checkpoint point (DCP) to check to determine if the design is susceptible to this issue. If the check passes, your design is safe to use as the hardware will function properly. If the check fails, the design is susceptible to the issue and will need to be regenerated using the same tool version with the AR 73068 patch. For designs under development, we recommend applying the patch to your on-premises tools or update to developer kit v1.4.15. -For additional details, please refer to the [Xilinx Answer Record #73068](https://www.xilinx.com/support/answers/73068.html) +For additional details, please refer to the [Xilinx Answer Record #73068](https://support.xilinx.com/s/article/73068?language=en_US) We recommend using [Developer Kit Release v1.4.15a](https://github.com/aws/aws-fpga/releases/tag/v1.4.15a) or newer to allow for patching and fixing the DDR4 IP timing exception by re-generating the IP. @@ -45,8 +45,8 @@ We recommend using [Developer Kit Release v1.4.15a](https://github.com/aws/aws-f | Library(verilog) | Simulator | Xilinx Answer Record | |---|---|---| -| `sync_ip` | Cadence IES | [AR72795](https://www.xilinx.com/support/answers/72795.html) | -| `hdmi_gt_controller_v1_0_0` | Synopsys VCS | [AR72601](https://www.xilinx.com/support/answers/72601.html) | +| `sync_ip` | Cadence IES | [AR72795](https://support.xilinx.com/s/article/72795?language=en_US) | +| `hdmi_gt_controller_v1_0_0` | Synopsys VCS | [AR72601](https://support.xilinx.com/s/article/72601?language=en_US) | ## SDK diff --git a/developer_resources/DCV.md b/developer_resources/DCV.md index 86b04ea77..76f6a0569 100644 --- a/developer_resources/DCV.md +++ b/developer_resources/DCV.md @@ -61,10 +61,10 @@ If you experience issues please refer to the [Official DCV documentation](https: ``` sudo rpm --import https://d1uj6qtbmh3dt5.cloudfront.net/NICE-GPG-KEY - wget https://d1uj6qtbmh3dt5.cloudfront.net/2021.1/Servers/nice-dcv-2021.1-10598-el7-x86_64.tgz - tar -xvzf nice-dcv-2021.1-10598-el7-x86_64.tgz && cd nice-dcv-2021.1-10598-el7-x86_64 - sudo yum install nice-dcv-server-2021.1.10598-1.el7.x86_64.rpm - sudo yum install nice-xdcv-2021.1.392-1.el7.x86_64.rpm + wget https://d1uj6qtbmh3dt5.cloudfront.net/2021.2/Servers/nice-dcv-2021.2-11048-el7-x86_64.tgz + tar -xvzf nice-dcv-2021.2-11048-el7-x86_64.tgz && cd nice-dcv-2021.2-11048-el7-x86_64 + sudo yum install nice-dcv-server-2021.2.11048-1.el7.x86_64.rpm + sudo yum install nice-xdcv-2021.2.406-1.el7.x86_64.rpm sudo systemctl enable dcvserver sudo systemctl start dcvserver @@ -100,7 +100,7 @@ If you experience issues please refer to the [Official DCV documentation](https: **NOTE: You will have to create a new session if you restart your instance.** ``` - dcv create-session --type virtual --user centos centos + dcv create-session --type virtual --user centos centos --owner centos ``` 1. Connect to the DCV Remote Desktop session @@ -125,4 +125,4 @@ If you experience issues please refer to the [Official DCV documentation](https: 1. Logging in should show you your new GUI Desktop: - ![DCV Desktop](images/dcv_desktop.png) \ No newline at end of file + ![DCV Desktop](images/dcv_desktop.png) diff --git a/docs/examples/example_list.md b/docs/examples/example_list.md index 5cff1e6d8..61ae69e52 100644 --- a/docs/examples/example_list.md +++ b/docs/examples/example_list.md @@ -10,7 +10,6 @@ | High Level Synthesis | [Digital Up-Converter - cl\_hls\_dds\_hlx](../../hdk/cl/examples/cl_hls_dds_hlx) | HLx - C-to-RTL | Demonstrates an example application written in C that is synthesized to RTL (Verilog) | | Custom Hardware with Software Defined Acceleration | [RTL Kernels](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/rtl_kernels) | Vitis - RTL (Verilog) + C/C++/OpenCL | These examples demonstrate developing new hardware designs (RTL) in a Software Defined workflow| | Vitis Compression Libraries | [File Compression using GZip](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/library_examples/gzip_app) | Vitis - C/C++/OpenCL | This example demonstrates how to use Vitis Libraries to speed up GZIP compression on an FPGA | -| Vitis BLAS libraries | [Matrix Transposer using BLAS](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/library_examples/transp) | Vitis - C/C++/OpenCL | This example shows how to use Vitis BLAS Libraries to create a Matrix Transposer on an FPGA | | Vitis Financial libraries | [Monte Carlo European Engine](https://github.com/Xilinx/Vitis_Accel_Examples/tree/master/library_examples/MCEuropeanEngine) | Vitis - C/C++/OpenCL | This example shows how to use Vitis Financial Libraries to accelerate MCEuropean Engine on an FPGA| ## Application Notes diff --git a/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md b/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md index f836fbfe8..8b1aae970 100644 --- a/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md +++ b/hdk/docs/AWS_Shell_V1.4_Migration_Guidelines.md @@ -29,7 +29,7 @@ NOTE: Only INCR burst mode is supported on AXI-4 buses between CL/Shell interfac 5. Upgrade to latest for [SH_DDR IP](../common/shell_v04261818/design/sh_ddr) . -6. All Xilinx IP in your CL must to be upgraded to 2017.4 or later version. see [vivado 2017.4 release notes for recommended version](https://www.xilinx.com/support/answers/70386.html) +6. All Xilinx IP in your CL must to be upgraded to 2017.4 or later version. see [vivado 2017.4 release notes for recommended version](https://support.xilinx.com/s/article/70386?language=en_US) 7. [ILA cores](../common/shell_v04261818/design/ip/cl_debug_bridge) need to be upgraded for 2017.4 or later Please refer to the [cl_dram_dma](../cl/examples/cl_dram_dma/design) example for ILA hookup on PCIS interface. diff --git a/hdk/docs/Programmer_View.md b/hdk/docs/Programmer_View.md index afcb1fbbb..625e52914 100644 --- a/hdk/docs/Programmer_View.md +++ b/hdk/docs/Programmer_View.md @@ -12,7 +12,7 @@ There are two parts required to work with AWS FPGA: Management and Runtime, and **\[B\]** As a C-library called [FPGA Management Lib](../../sdk/userspace/fpga_libs/fpga_mgmt/) to be compiled with the developer's C/C++ application. - **\[C\]** Pre-integrated with [OpenCL runtime library](../../SDAccel) + **\[C\]** Pre-integrated with [OpenCL runtime library](../../Vitis) 2. **Runtime code**: is required for reading/writing from/to the Custom Logic, handling interrupts, and using the DMA. This is provided by: @@ -22,6 +22,6 @@ There are two parts required to work with AWS FPGA: Management and Runtime, and **\[F\]** A [Userspace Interrupt/Event notification](../../sdk/linux_kernel_drivers/xdma/user_defined_interrupts_README.md) using standard POSIX API like open() and poll(), to be used in any C/C++ application. This Interrupt/Event interface requires installing the [XDMA kernel driver](../../sdk/linux_kernel_drivers/xdma/xdma_install.md) - marked as item **\[G\]**. - **\[I\]** An [openCL ICD](https://wikipedia.org/wiki/OpenCL#Implementations) library that links with openCL runtime application, like the one generated by Xilinx' SDAccel. + **\[I\]** An openCL ICD library that links with openCL runtime application, like the one generated by Xilinx Vitis. diff --git a/hdk/docs/afi_power.md b/hdk/docs/afi_power.md index 3d4766c43..70a2133c6 100644 --- a/hdk/docs/afi_power.md +++ b/hdk/docs/afi_power.md @@ -4,7 +4,12 @@ There are 2 power related scenarios that need to be avoided: 1. Ramping too quickly between low power and high power states ## Exceeding Maximum FPGA power -The Xilinx UltraScale+ FPGA devices used on the F1 instances have a maximum power limit that must be maintained. If a loaded AFI consumes maximum power, the F1 instance will automatically gate the input clocks provided to the AFI in order to prevent errors within the FPGA. This is called an afi-power-violation. Specifically, when power (Vccint) is greater than 85 watts, the CL will have a power warning bit set. Above that level, the CL is in danger of being clock gated due to an afi-power-violation. +The Xilinx UltraScale+ FPGA devices used on the F1 instances impose a Maximum Power Consumption Limit of 85 Watts. The F1 Instance takes following actions if the AFI causes power consumption to exceed the imposed limit: +1. The ```afi-power-warning``` is triggered when the AFI running on an F1 instance exceeds the power consumption limit of 85W. The clocks from Shell to the CL are not gated while this warning is set. This is an indication that Customer Logic should reduce power consumption. +2. If the high power consumption is not mitigated then the Shell asserts ```afi-power-violation``` and gate the clocks to the CL. +3. In addition, if the F1 Instance detects high power consumption at the time of AFI load then ```afi-power-violation``` is set, clocks are disabled and any transactions along Shell-CL interface results in timeout. + +For all practical purposes, we strongly recommend customer design to act on ```afi-power-warning``` to reduce power consumption. ## Ramping too Quickly Between Low Power and High Power States Even though your design may have a max power which is lower than the previously described limit, you might see issues if you rapidly switch between low power and high power states. A common scenario is upon startup the design goes from a low power reset state to the max power state instantly. In failing cases the host will appear to lose contact with the FPGA card and can only recover with an instance stop/restart. To prevent this from happening care must be taken to sequence the design such that it slowly increases the power requirements to max power instead of instantaneously doing so. @@ -27,16 +32,16 @@ Power consumption (Vccint): Power consumption may drift slightly over time, and may vary from instance to instance. In order to prevent a power violation, it's important to take into account this natural variation, and design with margin accordingly. ## Lowering Power Based on High Power Events Reported by the Shell -In order to help developers avoid these overpower events, the F1 system indicates a afi-power-warning on the CL interface (sh_cl_pwr_state[1:0]) when the FPGA power levels are above 85 watts, and the CL is in danger of having it's clocks disabled. This should allow the CL to self-throttle, or reduce power-hungry optimizations, and avoid having its input clocks disabled. +In order to help developers avoid these overpower events, the F1 system indicates a ```afi-power-warning``` on the CL interface (```sh_cl_pwr_state[1:0]```) when the FPGA power levels are above 85 watts, and the CL is in danger of having it's clocks disabled. This should allow the CL to self-throttle, or reduce power-hungry optimizations, and avoid having its input clocks disabled. -Power state of the FPGA: sh_cl_pwr_state[1:0] -0x0 – OK -0x1 – UNUSED -0x2 – afi-power-warning -0x3 – afi-power-violation +Power state of the FPGA: ```sh_cl_pwr_state[1:0]```: +* 0x0 – OK +* 0x1 – UNUSED +* 0x2 – ```afi-power-warning```: Set when AFI exceeds the Power Consumption limit of 85W. Shell-CL clocks are not gated. +* 0x3 – ```afi-power-violation```: Set when AFI continues to exceed Power Consumption limit and Shell-CL clocks are shut off. ## Detecting power-violation -The fpga-describe-local-image command will show that the AFI load has failed due to an afi-power-violation +The fpga-describe-local-image command will show that the AFI load has failed due to an ```afi-power-violation``` ``` # fpga-describe-local-image -S 0 @@ -44,7 +49,7 @@ The fpga-describe-local-image command will show that the AFI load has failed due AFIDEVICE 0 0x1d0f 0xf000 0000:00:1d.0 ``` -An afi-power-violation can occur either when the FPGA is first loaded, or while the FPGA is running a particularly power-intense workload. If the afi-power-violation occurs during a fpga-load-local-image, the load local image will itself fail with the afi-power-violation error. After a afi-power-violation, transactions to CL will trigger [timeouts on all Shell to CL interfaces](./HOWTO_detect_shell_timeout.md). +An ```afi-power-violation``` can occur either when the FPGA is first loaded, or while the FPGA is running a particularly power-intense workload. If the ```afi-power-violation``` occurs during a fpga-load-local-image, the load local image will itself fail with the ```afi-power-violation``` error. After a ```afi-power-violation```, transactions to CL will trigger [timeouts on all Shell to CL interfaces](./HOWTO_detect_shell_timeout.md). ## Analyze power reports from Vivado Once the AFI power has been identified on F1, we recommend using Vivado to analyze the design to help reduce power. First, open the DCP (Design check point) in the Vivado GUI. Then, run the tcl command within Vivado: @@ -70,7 +75,7 @@ https://www.xilinx.com/support/documentation/sw_manuals/xilinx2017_2/ug907-vivad Using a lower clock frequency from the [supported clock recipe](./clock_recipes.csv) will reduce the power consumed by the AFI. ## Recovering from clock gating -When an afi-power-violation occurs, the FPGA can still be loaded and cleared, but the clocks cannot be re-enabled without reloading the FPGA. Any AFI load or clear will restore full functionality to the FPGA. +When an ```afi-power-violation``` occurs, the FPGA can still be loaded and cleared, but the clocks cannot be re-enabled without reloading the FPGA. Any AFI load or clear will restore full functionality to the FPGA. # Power Savings Techniques Here are some low power design techniques that can be used to lower the overall power or minimize instantaneous power ramps. @@ -90,6 +95,3 @@ Power is consumed whenever a node in the design switches high or low. Reducing t **Architectural Power Savings**: A global power savings technique is to control power at the top-level Architectural Level. There is typically a block diagram of the overall design. By gating the clocks to top-level blocks and/or creating enables for the sequential elements in the design, these blocks can be put into low power modes when they aren't being used. It's critical to only enable the blocks that are required for the job. **Reducing Instantaneous Swings in Power**: Care must be taken to ensure there aren't large swings between low power and high power states. Sequencing the enables to the top-level architectural blocks will allow the design to slowly ramp to max power levels. - - - diff --git a/sdk/apps/virtual-ethernet/README.md b/sdk/apps/virtual-ethernet/README.md index 5515f44b7..f6feccc96 100644 --- a/sdk/apps/virtual-ethernet/README.md +++ b/sdk/apps/virtual-ethernet/README.md @@ -9,7 +9,7 @@ The Virtual Ethernet framework facilitates streaming Ethernet frames from a netw In order to get the most from this document, readers may want to familiarize themselves with the following: 1. The [F1 customer logic flow](../../../hdk/README.md#endtoend); creating and loading AFIs (Amazon FPGA Images) -2. [DPDK](http://dpdk.org) (Data Plane Development Kit), a framework for creating high performance network traffic handling tools in userspace by limiting context switches, locks, or other blocking logic. +2. [DPDK](https://dpdk.org) (Data Plane Development Kit), a framework for creating high performance network traffic handling tools in userspace by limiting context switches, locks, or other blocking logic. 3. [EC2 Networking Concepts](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EC2_Network_and_Security.html) # Virtual Ethernet Architecture diff --git a/sdk/userspace/add_udev_rules.sh b/sdk/userspace/add_udev_rules.sh index f72bcac2e..9663e2ee6 100755 --- a/sdk/userspace/add_udev_rules.sh +++ b/sdk/userspace/add_udev_rules.sh @@ -22,13 +22,16 @@ rm -f /tmp/sdk_root_env.exp mkdir -p /opt/aws/bin +DBDFs=`lspci -Dn | grep -Ew "1d0f:1042|1d0f:1041" | awk '{print $1}' | sed ':x;N;$!bx;s/\n/ /g'` + + if [[ $AWS_FPGA_SDK_OTHERS ]]; then # Allow all users -# Make a script that will be run to change permissions everytime -# udev rule for the DBDF is matched -echo "Installing permission fix script for udev" -cat >/opt/aws/bin/change-fpga-perm.sh</opt/aws/bin/change-fpga-perm.sh</etc/udev/rules.d/9999-presistent-fpga.rules</dev/null 2>&1 -if [[ $? -eq 0 ]] ; then - if [ -z ${AWS_FPGA_SDK_OVERRIDE_GROUP} ] ; then - echo "Group ${AWS_FPGA_SDK_GROUP} already exists. Please export a non existent group name to AWS_FPGA_SDK_GROUP or export AWS_FPGA_SDK_OVERRIDE_GROUP=y" - exit 1 - fi - echo "${AWS_FPGA_SDK_GROUP} already exists, will grant FPGA resource access to this group" -else - groupadd ${AWS_FPGA_SDK_GROUP} - if [[ $? -ne 0 ]] ; then - echo "Could not group ${AWS_FPGA_SDK_GROUP}" - exit 1 - fi -fi + echo "Creating group ${AWS_FPGA_SDK_GROUP}" + getent group ${AWS_FPGA_SDK_GROUP} >/dev/null 2>&1 + if [[ $? -eq 0 ]] ; then + if [ -z ${AWS_FPGA_SDK_OVERRIDE_GROUP} ] ; then + echo "Group ${AWS_FPGA_SDK_GROUP} already exists. Please export a non existent group name to AWS_FPGA_SDK_GROUP or export AWS_FPGA_SDK_OVERRIDE_GROUP=y" + exit 1 + fi + echo "${AWS_FPGA_SDK_GROUP} already exists, will grant FPGA resource access to this group" + else + groupadd ${AWS_FPGA_SDK_GROUP} + if [[ $? -ne 0 ]] ; then + echo "Could not group ${AWS_FPGA_SDK_GROUP}" + exit 1 + fi + fi -echo "Adding user ${SDK_NON_ROOT_USER} into group ${AWS_FPGA_SDK_GROUP}" -getent group ${AWS_FPGA_SDK_GROUP} | grep -qw ${SDK_NON_ROOT_USER} -if [[ $? -eq 0 ]] ; then - echo "${SDK_NON_ROOT_USER} is already in group ${AWS_FPGA_SDK_GROUP}" -else - usermod -a -G ${AWS_FPGA_SDK_GROUP} ${SDK_NON_ROOT_USER} - if [[ $? -ne 0 ]] ; then - echo "Could not add user ${SDK_NON_ROOT_USER} to group ${AWS_FPGA_SDK_GROUP}" - exit 1 - fi -fi + echo "Adding user ${SDK_NON_ROOT_USER} into group ${AWS_FPGA_SDK_GROUP}" + getent group ${AWS_FPGA_SDK_GROUP} | grep -qw ${SDK_NON_ROOT_USER} + if [[ $? -eq 0 ]] ; then + echo "${SDK_NON_ROOT_USER} is already in group ${AWS_FPGA_SDK_GROUP}" + else + usermod -a -G ${AWS_FPGA_SDK_GROUP} ${SDK_NON_ROOT_USER} + if [[ $? -ne 0 ]] ; then + echo "Could not add user ${SDK_NON_ROOT_USER} to group ${AWS_FPGA_SDK_GROUP}" + exit 1 + fi + fi -# Fail on any unsucessful command -set -e -# Make a script that will be run to change permissions everytime -# udev rule for the DBDF is matched -echo "Installing permission fix script for udev" -cat >/opt/aws/bin/change-fpga-perm.sh</opt/aws/bin/change-fpga-perm.sh</etc/udev/rules.d/9999-presistent-fpga.rules<> /tmp/9999-presistent-fpga.rules + done + for d in $DBDFs ; do + echo "KERNEL==\"*${d}*\", ACTION==\"add\", RUN+=\"/opt/aws/bin/change-fpga-perm.sh '${d}'\"" >> /tmp/9999-presistent-fpga.rules + done + echo "Adding udev rule: 9999-presistent-fpga.rules" + cp /tmp/9999-presistent-fpga.rules /etc/udev/rules.d/9999-presistent-fpga.rules +fi + +chmod 544 /opt/aws/bin/change-fpga-perm.sh + ## Test the rules for any issues for d in $DBDFs ; do udevadm test --action="add" /sys/bus/pci/devices/${d} >/dev/null 2>&1 diff --git a/shared/bin/set_common_env_vars.sh b/shared/bin/set_common_env_vars.sh index 652de643a..e883997fb 100644 --- a/shared/bin/set_common_env_vars.sh +++ b/shared/bin/set_common_env_vars.sh @@ -36,7 +36,7 @@ unset HDK_SHELL_DESIGN_DIR export -f allow_non_root export -f allow_others -if allow_non_root && allow_others ; then +if allow_non_root || allow_others ; then export AWS_FPGA_SDK_GROUP=${AWS_FPGA_SDK_GROUP:-"fpgauser"} export SDK_NON_ROOT_USER=$(whoami) info_msg "Allowing group ${AWS_FPGA_SDK_GROUP} access to FPGA management tools and resources" diff --git a/shared/tests/bin/check_md_links.py b/shared/tests/bin/check_md_links.py index 8ec80eb96..91d6a168a 100755 --- a/shared/tests/bin/check_md_links.py +++ b/shared/tests/bin/check_md_links.py @@ -43,6 +43,7 @@ import logging import markdown import os +import ssl import os.path from os.path import dirname, realpath import re @@ -112,7 +113,8 @@ def check_link(url): if not urlparse.urlparse(url).netloc: return False - website = urlopen(url) + context = ssl._create_unverified_context() + website = urlopen(url, context=context) html = website.read() if website.code != 200: