diff --git a/.travis.yml b/.travis.yml index fc2b142c..f8e77edd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ rvm: - 2.7.1 before_install: - - sudo apt-get install -y libpango1.0-dev ghostscript fonts-lyx jing + - sudo apt-get install -y libpango1.0-dev ghostscript fonts-lyx jing libavalon-framework-java libbatik-java python3-pyparsing - gem install asciidoctor -v 2.0.16 - gem install coderay -v 1.1.1 - gem install rouge -v 3.19.0 @@ -17,17 +17,19 @@ before_install: - gem install hexapdf -v 0.27.0 - gem install asciidoctor-pdf -v 1.5.0 - gem install asciidoctor-mathematical -v 0.3.5 + - pip install pyparsing script: - git describe --tags --dirty - - make -O -j 5 api c env ext cxx4opencl manhtmlpages - make -C xml validate + - python3 makeSpec -clean -spec core OUTDIR=out.core -j 5 api c env ext cxx4opencl + - python3 makeSpec -clean -spec khr OUTDIR=out.khr -j 12 html manhtmlpages deploy: provider: releases api_key: $GH_TOKEN file_glob: true - file: out/pdf/* + file: out.*/pdf/* skip_cleanup: true on: tags: true diff --git a/Makefile b/Makefile index 239a8e4e..8e171b81 100644 --- a/Makefile +++ b/Makefile @@ -1,48 +1,56 @@ -# Copyright (c) 2013-2024 The Khronos Group Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 +# Copyright 2013-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +# OpenCL Specifications Makefile # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# To build the specifications / reference pages (refpages) with optional +# extensions included, set the $(EXTENSIONS) variable on the make +# command line to a space-separated list of extension names. +# $(EXTENSIONS) is converted into generator script +# arguments $(EXTOPTIONS) and into $(ATTRIBFILE) + +EXTS := $(sort $(EXTENSIONS)) +EXTOPTIONS := $(foreach ext,$(EXTS),-extension $(ext)) QUIET ?= +VERYQUIET ?= @ ASCIIDOCTOR ?= asciidoctor -XMLLINT ?= xmllint -DBLATEX ?= dblatex -DOS2UNIX ?= dos2unix RM = rm -f RMRF = rm -rf MKDIR = mkdir -p CP = cp GITHEAD = ./.git/logs/HEAD +# Where the repo root is +ROOTDIR = $(CURDIR) +# Where the spec files are +SPECDIR = $(CURDIR) + +# Path to scripts used in generation +SCRIPTS = $(ROOTDIR)/scripts +# Path to configs and asciidoc extensions used in generation +CONFIGS = $(ROOTDIR)/config + # Target directories for output files # HTMLDIR - 'html' target # PDFDIR - 'pdf' target # CHECKDIR - 'allchecks' target -OUTDIR := out -HTMLDIR := $(OUTDIR)/html -PDFDIR := $(OUTDIR)/pdf +OUTDIR = out +HTMLDIR = $(OUTDIR)/html +PDFDIR = $(OUTDIR)/pdf +PYAPIMAP = $(GENERATED)/apimap.py # PDF Equations are written to SVGs, this dictates the location to store those files (temporary) -PDFMATHDIR := $(OUTDIR)/equations_temp +PDFMATHDIR = $(OUTDIR)/equations_temp # Set VERBOSE to -v to see what asciidoc is doing. VERBOSE = # asciidoc attributes to set. # NOTEOPTS sets options controlling which NOTEs are generated -# ATTRIBOPTS sets the api revision and enables MathJax generation, and -# the path to generate include files +# ATTRIBOPTS sets the api revision and enables KaTeX generation # ADOCOPTS options for asciidoc->HTML5 output (book document type) -# ADOCMANOPTS options for asciidoc->HTML5 output (manpage document type) +# ADOCREFOPTS options for asciidoc->HTML5 output (manpage document type) # Currently unused in CL spec NOTEOPTS = -a editing-notes # Spell out RFC2822 format as not all date commands support -R @@ -79,31 +87,47 @@ CXX4OPENCL_DOCREVISION = DocRev2021.12 CXX4OPENCL_DOCREMARK = $(SPECREMARK) \ tag: $(SPECREVISION) +# Some of the attributes used in building spec documents: +# generated - absolute path to generated sources +# refprefix - controls which generated extension metafiles are +# included at build time. Must be empty for specification, +# 'refprefix.' for refpages (see ADOCREFOPTS below). COMMONATTRIBOPTS = -a revdate="$(SPECDATE)" \ -a stem=latexmath \ -a generated=$(GENERATED) \ - -a sectnumlevels=5 + -a sectnumlevels=5 \ + -a refprefix ATTRIBOPTS = -a revnumber="$(SPECREVISION)" \ -a revremark="$(SPECREMARK)" \ $(COMMONATTRIBOPTS) -CXX4OPENCL_ATTRIBOPTS = -a revnumber="$(CXX4OPENCL_DOCREVISION)" \ +CXX4OPENCL_ATTRIBOPTS = -a revnumber="$(CXX4OPENCL_DOCREVISION)" \ -a revremark="$(CXX4OPENCL_DOCREMARK)" \ $(COMMONATTRIBOPTS) -ADOCEXTS = -r $(CURDIR)/config/sectnumoffset-treeprocessor.rb \ - -r $(CURDIR)/config/spec-macros.rb \ - -r $(CURDIR)/config/rouge_opencl.rb +ADOCEXTS = -r $(CONFIGS)/sectnumoffset-treeprocessor.rb \ + -r $(CONFIGS)/spec-macros.rb \ + -r $(CONFIGS)/rouge_opencl.rb CXX4OPENCL_ADOCOPTS = -d book $(CXX4OPENCL_ATTRIBOPTS) $(NOTEOPTS) $(VERBOSE) $(ADOCEXTS) ADOCCOMMONOPTS = -a apispec="$(CURDIR)/api" \ - -a config="$(CURDIR)/config" \ + -a config="$(CONFIGS)" \ -a cspec="$(CURDIR)/c" \ -a images="$(CURDIR)/images" \ $(ATTRIBOPTS) $(NOTEOPTS) $(VERBOSE) $(ADOCEXTS) ADOCOPTS = -d book $(ADOCCOMMONOPTS) -ADOCMANOPTS = -d manpage $(ADOCCOMMONOPTS) + +# Asciidoctor options to build refpages +# +# ADOCREFOPTS *must* be placed after ADOCOPTS in the command line, so +# that it can override spec attribute values. +# +# cross-file-links makes custom macros link to other refpages +# refprefix includes the refpage (not spec) extension metadata. +# isrefpage is for refpage-specific content +ADOCREFOPTS = -a cross-file-links -a refprefix='refpage.' \ + -a isrefpage -d manpage # ADOCHTMLOPTS relies on the relative runtime path from the output HTML # file to the katex scripts being set with KATEXDIR. This is overridden @@ -111,9 +135,9 @@ ADOCMANOPTS = -d manpage $(ADOCCOMMONOPTS) # ADOCHTMLOPTS also relies on the absolute build-time path to the # 'stylesdir' containing our custom CSS. KATEXDIR = ../katex -ADOCHTMLEXTS = -r $(CURDIR)/config/katex_replace.rb +ADOCHTMLEXTS = -r $(CONFIGS)/katex_replace.rb ADOCHTMLOPTS = $(ADOCHTMLEXTS) -a katexpath=$(KATEXDIR) \ - -a stylesheet=khronos.css -a stylesdir=$(CURDIR)/config \ + -a stylesheet=khronos.css -a stylesdir=$(CONFIGS) \ -a sectanchors ADOCPDFEXTS = -r asciidoctor-pdf -r asciidoctor-mathematical --trace @@ -121,14 +145,21 @@ ADOCPDFOPTS = $(ADOCPDFEXTS) -a mathematical-format=svg \ -a imagesoutdir=$(PDFMATHDIR) # Where to put dynamically generated dependencies of the spec and other -# targets, from API XML. GENERATED and APIINCDIR specify the location of +# targets, from API XML. GENERATED and APIPATH specify the location of # the API interface includes. -# GENDEPENDS could have multiple dependencies. GENERATED = $(CURDIR)/generated REFPATH = $(GENERATED)/refpage -APIINCDIR = $(GENERATED)/api -VERSIONDIR = $(APIINCDIR)/version-notes -GENDEPENDS = $(APIINCDIR)/timeMarker +APIPATH = $(GENERATED)/api +METAPATH = $(GENERATED)/meta +VERSIONDIR = $(APIPATH)/version-notes +ATTRIBFILE = $(GENERATED)/specattribs.adoc + +# timeMarker is a proxy target created when many generated files are +# made at once +APIDEPEND = $(APIPATH)/timeMarker +METADEPEND = $(METAPATH)/timeMarker +# All generated dependencies +GENDEPENDS = $(APIDEPEND) $(METADEPEND) $(ATTRIBFILE) .PHONY: directories @@ -173,16 +204,22 @@ pdf: apipdf envpdf extpdf extensionspdf cxxpdf cpdf icdinstpdf # 'html' causing specs to *always* be regenerated. src: - @echo APISPECSRC = $(APISPECSRC) - @echo ENVSPECSRC = $(ENVSPECSRC) - @echo EXTSPECSRC = $(EXTSPECSRC) + @echo APISPECSRC = $(APISPECSRC) + @echo CSPECSRC = $(CSPECSRC) + @echo ENVSPECSRC = $(ENVSPECSRC) + @echo EXTSPECSRC = $(EXTSPECSRC) + @echo CEXTDOCSRC = $(CEXTDOCSRC) + @echo CXX4OPENCLDOCSRC = $(CXX4OPENCLDOCSRC) + @echo CXXSPECSRC = $(CXXSPECSRC) + @echo EXTENSIONSSPECSRC = $(EXTENSIONSSPECSRC) + @echo ICDINSTSPECSRC = $(ICDINSTSPECSRC) # API spec # Top-level spec source file APISPEC = OpenCL_API APISPECSRC = $(APISPEC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(APISPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(APISPEC).txt $(GENERATED)) apihtml: $(HTMLDIR)/$(APISPEC).html $(APISPECSRC) @@ -202,7 +239,7 @@ $(PDFDIR)/$(APISPEC).pdf: $(APISPECSRC) # Top-level spec source file ENVSPEC = OpenCL_Env ENVSPECSRC = $(ENVSPEC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(ENVSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(ENVSPEC).txt $(GENERATED)) envhtml: $(HTMLDIR)/$(ENVSPEC).html $(ENVSPECSRC) @@ -220,7 +257,7 @@ $(PDFDIR)/$(ENVSPEC).pdf: $(ENVSPECSRC) # Extensions spec EXTSPEC = OpenCL_Ext EXTSPECSRC = $(EXTSPEC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(EXTSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(EXTSPEC).txt $(GENERATED)) exthtml: $(HTMLDIR)/$(EXTSPEC).html $(EXTSPECSRC) @@ -239,12 +276,12 @@ $(PDFDIR)/$(EXTSPEC).pdf: $(EXTSPECSRC) EXTDIR = extensions EXTENSIONSSPEC = extensions EXTENSIONSSPECSRC = $(EXTDIR)/$(EXTENSIONSSPEC).txt ${GENDEPENDS} \ - $(shell grep ^include:: $(EXTDIR)/$(EXTENSIONSSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(EXTDIR)/$(EXTENSIONSSPEC).txt $(GENERATED)) # Included extension documents -EXTENSIONS := $(notdir $(wildcard $(EXTDIR)/[A-Za-z]*.asciidoc)) -EXTENSIONS_HTML = $(patsubst %.asciidoc,$(HTMLDIR)/%.html,$(EXTENSIONS)) -EXTENSIONS_PDF = $(patsubst %.asciidoc,$(PDFDIR)/%.pdf,$(EXTENSIONS)) +EXTDOCS := $(notdir $(wildcard $(EXTDIR)/[A-Za-z]*.asciidoc)) +EXTENSIONS_HTML = $(patsubst %.asciidoc,$(HTMLDIR)/%.html,$(EXTDOCS)) +EXTENSIONS_PDF = $(patsubst %.asciidoc,$(PDFDIR)/%.pdf,$(EXTDOCS)) extensionshtml: $(HTMLDIR)/$(EXTENSIONSSPEC).html $(EXTENSIONSSPECSRC) $(EXTENSIONS_HTML) @@ -270,7 +307,7 @@ $(PDFDIR)/$(EXTENSIONSSPEC).pdf: $(EXTENSIONSSPECSRC) $(GENDEPENDS) # Language Extensions spec CEXTDOC = OpenCL_LangExt CEXTDOCSRC = $(CEXTDOC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(CEXTDOC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(CEXTDOC).txt $(GENERATED)) cexthtml: $(HTMLDIR)/$(CEXTDOC).html $(CEXTDOCSRC) @@ -288,7 +325,7 @@ $(PDFDIR)/$(CEXTDOC).pdf: $(CEXTDOCSRC) # C++ (cxx) spec CXXSPEC = OpenCL_Cxx CXXSPECSRC = $(CXXSPEC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(CXXSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(CXXSPEC).txt $(GENERATED)) cxxhtml: $(HTMLDIR)/$(CXXSPEC).html $(CXXSPECSRC) @@ -306,7 +343,7 @@ $(PDFDIR)/$(CXXSPEC).pdf: $(CXXSPECSRC) # C spec CSPEC = OpenCL_C CSPECSRC = $(CSPEC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(CSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(CSPEC).txt $(GENERATED)) chtml: $(HTMLDIR)/$(CSPEC).html $(CSPECSRC) @@ -324,7 +361,7 @@ $(PDFDIR)/$(CSPEC).pdf: $(CSPECSRC) # C++ for OpenCL doc CXX4OPENCLDOC = CXX_for_OpenCL CXX4OPENCLDOCSRC = $(CXX4OPENCLDOC).txt $(GENDEPENDS) \ - $(shell grep ^include:: $(CXX4OPENCLDOC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(CXX4OPENCLDOC).txt $(GENERATED)) cxx4openclhtml: $(HTMLDIR)/$(CXX4OPENCLDOC).html $(CXX4OPENCLDOCSRC) @@ -342,7 +379,7 @@ $(PDFDIR)/$(CXX4OPENCLDOC).pdf: $(CXX4OPENCLDOCSRC) # ICD installation guidelines ICDINSTSPEC = OpenCL_ICD_Installation ICDINSTSPECSRC = $(ICDINSTSPEC).txt \ - $(shell grep ^include:: $(ICDINSTSPEC).txt | sed -e 's/^include:://' -e 's/\[\]/ /' | xargs echo) + $(shell scripts/find_adoc_deps $(ICDINSTSPEC).txt $(GENERATED)) icdinsthtml: $(HTMLDIR)/$(ICDINSTSPEC).html $(ICDINSTSPECSRC) @@ -359,18 +396,29 @@ $(PDFDIR)/$(ICDINSTSPEC).pdf: $(ICDINSTSPECSRC) # Clean generated and output files -clean: clean_html clean_pdf clean_generated +clean: clean_html clean_pdf clean_man clean_generated clean_html: - $(QUIET)$(RMRF) $(HTMLDIR) $(MANHTMLDIR) $(OUTDIR)/katex + $(QUIET)$(RMRF) $(HTMLDIR) $(OUTDIR)/katex clean_pdf: $(QUIET)$(RMRF) $(PDFDIR) $(PDFMATHDIR) +clean_man: + $(QUIET)$(RMRF) $(MANHTMLDIR) + +# Generated directories and files to remove +CLEAN_GEN_PATHS = \ + $(APIPATH) \ + $(METAPATH) \ + $(REFPATH) \ + $(GENERATED)/__pycache__ \ + $(PDFMATHDIR) \ + $(PYAPIMAP) \ + $(ATTRIBFILE) + clean_generated: - $(QUIET)$(RMRF) $(APIINCDIR)/* $(GENERATED)/api.py $($(REFPATH)/ - $(QUIET)$(RMRF) $(PDFMATHDIR) - $(QUIET)$(RMRF) $(GENERATED)/__pycache__ + $(QUIET)$(RMRF) $(CLEAN_GEN_PATHS) # Ref page targets for individual pages MANDIR := man @@ -396,22 +444,22 @@ MANSOURCES = $(filter-out $(REFPATH)/apispec.txt $(REFPATH)/footer.txt $(wildc # Should pass in $(EXTOPTIONS) to determine which pages to generate. # For now, all core and extension ref pages are extracted by genRef.py. ## Temporary - eventually should be all spec asciidoctor source files -SPECFILES = $(wildcard api/*.asciidoc) OpenCL_API.txt OpenCL_C.txt -SCRIPTS = scripts +SPECFILES = $(wildcard api/[A-Za-z]*.asciidoc) $(wildcard c/[A-Za-z]*.asciidoc) OpenCL_API.txt OpenCL_C.txt GENREF = $(SCRIPTS)/genRef.py LOGFILE = $(REFPATH)/refpage.log refpages: $(REFPATH)/apispec.txt -$(REFPATH)/apispec.txt: $(SPECFILES) $(GENREF) $(SCRIPTS)/reflib.py $(GENERATED)/api.py +$(REFPATH)/apispec.txt: $(SPECFILES) $(GENREF) $(SCRIPTS)/reflib.py $(PYAPIMAP) $(QUIET)$(MKDIR) $(REFPATH) $(PYTHON) $(GENREF) -genpath $(GENERATED) -basedir $(REFPATH) \ -rewrite $(REFPATH)/rewritebody -toc $(REFPATH)/tocbody \ - -log $(LOGFILE) $(SPECFILES) + -log $(LOGFILE) -extpath $(CURDIR)/api \ + $(EXTOPTIONS) $(SPECFILES) cat $(MANDIR)/tochead $(REFPATH)/tocbody $(MANDIR)/toctail > $(REFPATH)/toc.html (cat $(MANDIR)/rewritehead ; \ echo ; echo "# Aliases hard-coded in refpage markup" ; \ sort < $(REFPATH)/rewritebody) > $(REFPATH)/.htaccess - $(CP) $(MANDIR)/static/*.txt $(REFPATH) + echo $(CP) $(MANDIR)/static/*.txt $(REFPATH) # These targets are HTML5 ref pages # @@ -419,8 +467,15 @@ $(REFPATH)/apispec.txt: $(SPECFILES) $(GENREF) $(SCRIPTS)/reflib.py $(GENERATED) # actual list of man page sources isn't known until after # $(REFPATH)/apispec.txt is generated. $(GENDEPENDS) is generated before # running the recursive make, so it doesn't trigger twice +# $(SUBMAKEOPTIONS) suppresses the redundant "Entering / leaving" +# messages make normally prints out, similarly to suppressing make +# command output logging in the individual refpage actions below. +SUBMAKEOPTIONS = --no-print-directory manhtmlpages: $(REFPATH)/apispec.txt $(GENDEPENDS) - $(MAKE) -e buildmanpages + $(QUIET) echo "manhtmlpages: building HTML refpages with these options:" + $(QUIET) echo $(ASCIIDOCTOR) -b html5 $(ADOCOPTS) $(ADOCHTMLOPTS) \ + $(ADOCREFOPTS) -o REFPAGE.html REFPAGE.adoc + $(MAKE) $(SUBMAKEOPTIONS) -e buildmanpages $(CP) $(MANDIR)/*.html $(MANDIR)/*.css $(MANDIR)/*.gif $(MANHTMLDIR) $(CP) $(REFPATH)/.htaccess $(REFPATH)/*.html $(MANHTMLDIR) @@ -429,26 +484,31 @@ MANHTML = $(MANSOURCES:$(REFPATH)/%.txt=$(MANHTMLDIR)/%.html) buildmanpages: $(MANHTML) +# The refpage build process normally generates far too much output, so +# use VERYQUIET instead of QUIET $(MANHTMLDIR)/%.html: KATEXDIR = ../../katex $(MANHTMLDIR)/%.html: $(REFPATH)/%.txt $(MANCOPYRIGHT) $(GENDEPENDS) $(KATEXINST) - $(QUIET)$(MKDIR) $(MANHTMLDIR) - $(QUIET)$(ASCIIDOCTOR) -b html5 -a cross-file-links \ - $(ADOCMANOPTS) $(ADOCHTMLOPTS) -o $@ $< + $(VERYQUIET)echo "Building $@ from $< using default options" + $(VERYQUIET)$(MKDIR) $(MANHTMLDIR) + $(VERYQUIET)$(ASCIIDOCTOR) -b html5 $(ADOCOPTS) $(ADOCHTMLOPTS) \ + $(ADOCREFOPTS) -o $@ $< $(MANHTMLDIR)/intro.html: $(REFPATH)/intro.txt $(MANCOPYRIGHT) - $(QUIET)$(MKDIR) $(MANHTMLDIR) - $(QUIET)$(ASCIIDOCTOR) -b html5 -a cross-file-links \ - $(ADOCOPTS) $(ADOCHTMLOPTS) -o $@ $< + $(VERYQUIET)echo "Building $@ from $< using default options" + $(VERYQUIET)$(MKDIR) $(MANHTMLDIR) + $(VERYQUIET)$(ASCIIDOCTOR) -b html5 $(ADOCOPTS) $(ADOCHTMLOPTS) \ + $(ADOCREFOPTS) -o $@ $< # Targets generated from the XML and registry processing scripts -# api.py - Python encoding of the registry -# $(APIINCDIR)/timeMarker - proxy for 'apiinc' - generate API interfaces +# apimap.py - Python encoding of the registry +# apiinc / proxy $(APIDEPEND) - API interface include files in $(APIPATH) +# extinc / proxy $(METADEPEND) - extension appendix metadata include files in $(METAPATH) # # $(GENSCRIPTEXTRA) are extra options that can be passed to the # generation script, such as # '-diag diag' -REGISTRY = xml +REGISTRY = $(ROOTDIR)/xml APIXML = $(REGISTRY)/cl.xml GENSCRIPT = $(SCRIPTS)/gencl.py DICTSCRIPT = $(SCRIPTS)/gen_dictionaries.py @@ -456,14 +516,37 @@ VERSIONSCRIPT = $(SCRIPTS)/gen_version_notes.py GENSCRIPTOPTS = $(VERSIONOPTIONS) $(EXTOPTIONS) $(GENSCRIPTEXTRA) -registry $(APIXML) GENSCRIPTEXTRA = -$(GENERATED)/api.py: $(APIXML) $(GENSCRIPT) - $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(GENERATED) api.py +PYAPIMAP = $(GENERATED)/apimap.py + +scriptapi: pyapi -apiinc: $(APIINCDIR)/timeMarker +pyapi $(PYAPIMAP): $(APIXML) $(GENSCRIPT) + $(QUIET)$(MKDIR) $(GENERATED) + $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(GENERATED) apimap.py -$(APIINCDIR)/timeMarker: $(APIXML) $(DICTSCRIPT) $(GENSCRIPT) $(VERSIONSCRIPT) - $(QUIET)$(MKDIR) $(APIINCDIR) - $(QUIET)$(PYTHON) $(DICTSCRIPT) -registry $(APIXML) -o $(APIINCDIR) +apiinc: $(APIDEPEND) + +$(APIDEPEND): $(APIXML) $(DICTSCRIPT) $(GENSCRIPT) $(VERSIONSCRIPT) + $(QUIET)$(MKDIR) $(APIPATH) + $(QUIET)$(PYTHON) $(DICTSCRIPT) -registry $(APIXML) -o $(APIPATH) $(QUIET)$(MKDIR) $(VERSIONDIR) $(QUIET)$(PYTHON) $(VERSIONSCRIPT) -registry $(APIXML) -o $(VERSIONDIR) - $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(APIINCDIR) apiinc + $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(APIPATH) apiinc + +extinc: $(METADEPEND) + +$(METADEPEND): $(APIXML) $(GENSCRIPT) + $(QUIET)$(MKDIR) $(METAPATH) + $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(METAPATH) extinc + +# This generates a single file containing asciidoc attributes for each +# extension in the spec being built. +attribs: $(ATTRIBFILE) + +$(ATTRIBFILE): + for attrib in $(EXTS) ; do \ + echo ":$${attrib}:" ; \ + done > $@ + +# Debugging aid - generate all files from registry XML +generated: $(PYAPIMAP) $(GENDEPENDS) diff --git a/OpenCL_API.txt b/OpenCL_API.txt index 0df01b79..2be31d8e 100644 --- a/OpenCL_API.txt +++ b/OpenCL_API.txt @@ -1,6 +1,9 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group +// SPDX-License-Identifier: CC-BY-4.0 + +// Extensions to enable +// Must be included before the header and attribs.txt +include::{generated}/specattribs.adoc[] = The OpenCL^(TM)^ Specification :R: pass:q,r[^(R)^] @@ -73,6 +76,7 @@ include::api/appendix_e.asciidoc[] include::api/appendix_f.asciidoc[] include::api/appendix_g.asciidoc[] include::api/appendix_h.asciidoc[] +include::api/appendix_extensions.asciidoc[] <<< diff --git a/OpenCL_C.txt b/OpenCL_C.txt index ab33bdf1..5e7776c9 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -1,6 +1,9 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group. +// SPDX-License-Identifier: CC-BY-4.0 + +// Extensions to enable +// Must be included before the header and attribs.txt +include::{generated}/specattribs.adoc[] = The OpenCL^(TM)^ C Specification :R: pass:q,r[^(R)^] @@ -135,8 +138,7 @@ Feature macro identifiers are used as names of features in this document. .Optional features in OpenCL C 3.0 or newer and their predefined macros. [cols="1,1",options="header",] |==== -| *Feature Macro/Name* -| *Brief Description* +| Feature Macro/Name | Brief Description | {opencl_c_3d_image_writes} | The OpenCL C compiler supports built-in functions for writing to 3D image @@ -214,6 +216,19 @@ of work-items. | The OpenCL C compiler supports built-in functions that perform collective operations across a work-group. +ifdef::cl_khr_integer_dot_product[] +| {opencl_c_integer_dot_product_input_4x8bit_packed} + + (when the {cl_khr_integer_dot_product} extension macro is defined) + +| The OpenCL C compiler supports built-in functions that perform dot +products on 4x8 bit packed integer vectors + +| {opencl_c_integer_dot_product_input_4x8bit} + + (when the {cl_khr_integer_dot_product} extension macro is defined) +| The OpenCL C compiler supports built-in functions that perform dot +products on 4x8 bit integer vectors +endif::cl_khr_integer_dot_product[] + |==== In OpenCL C 3.0 or newer, feature macros must expand to the value `1` if the @@ -232,6 +247,7 @@ feature test macros because there is no guarantee that feature test macros will be defined and that if defined they will indicate the presence of the corresponding optional functionality. + [[extensions]] === Extensions @@ -251,6 +267,442 @@ can still be used as an extension, i.e. the same predefined extension macros are still valid in OpenCL C 3.0 or newer, however the use of feature macros is preferred whenever possible. + +ifdef::cl_khr_3d_image_writes[] +[[cl_khr_3d_image_writes,cl_khr_3d_image_writes]] +==== 3D Image Writes + +The `cl_khr_3d_image_writes` extension was promoted to OpenCL 2.0, and to +OpenCL 3.0 as the {opencl_c_3d_image_writes} feature. +The extension adds <> that allow a kernel to write to 3D image objects in addition to +2D image objects. +endif::cl_khr_3d_image_writes[] + + +ifdef::cl_khr_async_work_group_copy_fence[] +[[cl_khr_async_work_group_copy_fence,cl_khr_async_work_group_copy_fence]] +==== Async Work-group Copy Fence + +The `cl_khr_async_work_group_copy_fence` extension supports establishing a +memory synchronization ordering of asynchronous copies. +The extension provides the `async_work_group_copy_fence` function, as +described in the <> table +endif::cl_khr_async_work_group_copy_fence[] + + +ifdef::cl_khr_byte_addressable_store[] +[[cl_khr_byte_addressable_store,cl_khr_byte_addressable_store]] +==== Byte-Addressable Storage + +The `cl_khr_byte_addressable_store` extension was promoted to OpenCL C 1.1. +The extension relaxes <> on pointers to `char`, `uchar`, +`char2`, `uchar2`, `short`, `ushort` and `half`, allowing applications to +read from and write to pointers to these types. +endif::cl_khr_byte_addressable_store[] + + +ifdef::cl_khr_depth_images[] +[[cl_khr_depth_images,cl_khr_depth_images]] +==== Depth Images + +The `cl_khr_depth_images` extension was promoted to OpenCL 2.0. +The extension provides new <>, as well as <>, +<>, +<>, and <> operating on those types. +endif::cl_khr_depth_images[] + + +ifdef::cl_khr_device_enqueue_local_arg_types[] +[[cl_khr_device_enqueue_local_arg_types,cl_khr_device_enqueue_local_arg_types]] +==== Device Enqueue Local Argument Types + +The `cl_khr_device_enqueue_local_arg_types` extension allows arguments to +blocks that are passed to the <> and to the <> to be pointers to any type (built-in or +user-defined) in local memory, instead of requiring arguments to blocks to +be pointers to `void` in local memory. +endif::cl_khr_device_enqueue_local_arg_types[] + + +ifdef::cl_khr_extended_async_copies[] +[[cl_khr_extended_async_copies,cl_khr_extended_async_copies]] +==== Extended Async Copy Functions + +The `cl_khr_extended_async_copies` extension provides additional +<> which interpret the +source and destination as 2D or 3D images. +endif::cl_khr_extended_async_copies[] + + +ifdef::cl_khr_extended_bit_ops[] +[[cl_khr_extended_bit_ops,cl_khr_extended_bit_ops]] +==== Extended Bit Operations + +The `cl_khr_extended_bit_ops` extension provides additional +<> including bitfield +insert, bitfield extract, and bit reverse. +endif::cl_khr_extended_bit_ops[] + + +ifdef::cl_khr_fp16[] +[[cl_khr_fp16,cl_khr_fp16]] +==== Half-Precision Floating-Point + +The `cl_khr_fp16` extension was promoted to OpenCL C 1.2 as an optional +feature, and to OpenCL 3.0 as the optional {cl_khr_fp16} feature. +The extension provides 16-bit precision scalar and vector floating-point +data types and extends many functions to accept these types. +endif::cl_khr_fp16[] + + +ifdef::cl_khr_fp64[] +[[cl_khr_fp64,cl_khr_fp64]] +==== Double-Precision Floating-Point + +The `cl_khr_fp64` extension was promoted to OpenCL C 1.2 as an optional +feature, and to OpenCL 3.0 as the optional {cl_khr_fp64} feature. +The extension provides double-precision scalar and vector floating-point +data types and extends many functions to accept these types. +endif::cl_khr_fp64[] + + +ifdef::cl_khr_gl_msaa_sharing[] +[[cl_khr_gl_msaa_sharing,cl_khr_gl_msaa_sharing]] +==== Multi-Sample Shared OpenCL/OpenGL Images + +The `cl_khr_gl_msaa_sharing` extension adds support for multi-sample images +shared with OpenGL multi-sample textures. +The extension provides new <>, as well as <> and <> operating on those +types. +endif::cl_khr_gl_msaa_sharing[] + + +ifdef::cl_khr_global_int32_base_atomics[] +[[cl_khr_global_int32_base_atomics,cl_khr_global_int32_base_atomics]] +==== Global 32-Bit Base Atomics + +The `cl_khr_global_int32_base_atomics` extension was promoted to OpenCL C +1.1, with the supported functions renamed to use the **atomic_** prefix +rather than the **atom_** prefix. +The extension provides base atomic functions for {global} variables, as +described in the <> table. + +endif::cl_khr_global_int32_base_atomics[] + + +ifdef::cl_khr_global_int32_extended_atomics[] +[[cl_khr_global_int32_extended_atomics,cl_khr_global_int32_extended_atomics]] +==== Global 32-Bit Extended Atomics + +The `cl_khr_global_int32_extended_atomics` extension was promoted to OpenCL +C 1.1, with the supported functions renamed to use the **atomic_** prefix +rather than the **atom_** prefix. +The extension provides extended atomic functions for {global} variables, as +described in the <> table. + +endif::cl_khr_global_int32_extended_atomics[] + + +ifdef::cl_khr_initialize_memory[] +[[cl_khr_initialize_memory,cl_khr_initialize_memory]] +==== Initializing Memory + +The `cl_khr_initialize_memory` extension allows creating a context which +initializes specified types (local or private) of memory prior to the start +of kernel execution. + +There is one <> on the timing +of this initialization discussed in this document, although most of the +extension is defined by the OpenCL 3.0 API Specification. +endif::cl_khr_initialize_memory[] + + +ifdef::cl_khr_int64_base_atomics[] +[[cl_khr_int64_base_atomics,cl_khr_int64_base_atomics]] +==== 64-Bit Base Atomics + +The `cl_khr_int64_base_atomics` extension provides base atomic functions for +{global} and {local} 64-bit signed and unsigned integer variables, as +described in the <> table. +endif::cl_khr_int64_base_atomics[] + + +ifdef::cl_khr_int64_extended_atomics[] +[[cl_khr_int64_extended_atomics,cl_khr_int64_extended_atomics]] +==== 64-Bit Extended Atomics + +The `cl_khr_int64_extended_atomics` extension provides extended atomic functions for +{global} and {local} 64-bit signed and unsigned integer variables, as +described in the <> table. +endif::cl_khr_int64_extended_atomics[] + + +ifdef::cl_khr_integer_dot_product[] +[[cl_khr_integer_dot_product,cl_khr_integer_dot_product]] +==== Integer Dot Product + +The `cl_khr_integer_dot_product` extension adds support for SPIR-V +instructions and OpenCL C built-in functions to compute the dot product of +vectors of integers. +The extension provides new <> operating on these types. +endif::cl_khr_integer_dot_product[] + + +ifdef::cl_khr_local_int32_base_atomics[] +[[cl_khr_local_int32_base_atomics,cl_khr_local_int32_base_atomics]] +==== Local 32-Bit Base Atomics + +The `cl_khr_local_int32_base_atomics` extension was promoted to OpenCL C +1.1, with the supported functions renamed to use the **atomic_** prefix +rather than the **atom_** prefix. +The extension provides base atomic functions for {local} variables, as +described in the <> table. + +endif::cl_khr_local_int32_base_atomics[] + + +ifdef::cl_khr_local_int32_extended_atomics[] +[[cl_khr_local_int32_extended_atomics,cl_khr_local_int32_extended_atomics]] +==== Local 32-Bit Extended Atomics + +The `cl_khr_local_int32_extended_atomics` extension was promoted to OpenCL +C 1.1, with the supported functions renamed to use the **atomic_** prefix +rather than the **atom_** prefix. +The extension provides extended atomic functions for {local} variables, as +described in the <> table. + +endif::cl_khr_local_int32_extended_atomics[] + + +ifdef::cl_khr_mipmap_image[] +[[cl_khr_mipmap_image,cl_khr_mipmap_image]] +==== Mipmapped Image Reads and Queries + +The `cl_khr_mipmap_image` extension adds support for mipmap images. +The extension provides built-in <> and <> functions +operating on these images. +endif::cl_khr_mipmap_image[] + + +ifdef::cl_khr_mipmap_image_writes[] +[[cl_khr_mipmap_image_writes,cl_khr_mipmap_image_writes]] +==== Mipmapped Image Writes + +The `cl_khr_mipmap_image_writes` extension adds support for writing to +mipmap images, and requires support for the `<>` +extension macro. +The extension provides built-in <> functions operating on these images. +endif::cl_khr_mipmap_image_writes[] + + +ifdef::cl_khr_select_fprounding_mode[] +[[cl_khr_select_fprounding_mode,cl_khr_select_fprounding_mode]] +==== Select Floating-Point Rounding Mode + +The `cl_khr_select_fprounding_mode` extension allows <> for an instruction or group of +instructions in the program source by use of a *#pragma*. + +The extension was deprecated in OpenCL 1.1 and its use is not recommended. +endif::cl_khr_select_fprounding_mode[] + + +ifdef::cl_khr_srgb_image_writes[] +[[cl_khr_srgb_image_writes,cl_khr_srgb_image_writes]] +==== sRGB Image Write Functions + +The `cl_khr_srgb_image_writes` extension adds support for writing to sRGB +images using the <> +functions. Color space conversion is performed by the function. +endif::cl_khr_srgb_image_writes[] + + +ifdef::cl_khr_subgroups[] +[[cl_khr_subgroups,cl_khr_subgroups]] +==== Sub-Groups + +The `cl_khr_subgroups` extension was promoted to OpenCL C 2.1 as the +{opencl_c_subgroups} feature. +The extension provides the following functions: + + * <> + * <> + * <> + * <> + * <> + * The <> type and + <> +endif::cl_khr_subgroups[] + + +ifdef::cl_khr_subgroup_ballot[] +[[cl_khr_subgroup_ballot,cl_khr_subgroup_ballot]] +==== Sub-Group Ballots + +The `cl_khr_subgroup_ballot` extension adds the ability to collect and +operate on ballots from work items in a sub-group. +The extension provides the following functions: + + * <> + +endif::cl_khr_subgroup_ballot[] + + +ifdef::cl_khr_subgroup_clustered_reduce[] +[[cl_khr_subgroup_clustered_reduce,cl_khr_subgroup_clustered_reduce]] +==== Clustered Reductions + +The `cl_khr_subgroup_clustered_reduce` extension adds support for clustered +reductions that operate on a subset of work items in the sub-group. +The extension provides the following functions: + + * <> + * <> + * <> + +endif::cl_khr_subgroup_clustered_reduce[] + + +ifdef::cl_khr_subgroup_extended_types[] +[[cl_khr_subgroup_extended_types,cl_khr_subgroup_extended_types]] +==== Sub-Group Extended Types + +The `cl_khr_subgroup_extended_types` extension adds <> to the existing +<>. + +endif::cl_khr_subgroup_extended_types[] + + +ifdef::cl_khr_subgroup_non_uniform_arithmetic[] +[[cl_khr_subgroup_non_uniform_arithmetic,cl_khr_subgroup_non_uniform_arithmetic]] +==== Built-in Non-Uniform Arithmetic Functions for Sub-Groups + +The `cl_khr_subgroup_non_uniform_arithmetic` extension adds the ability to +use some sub-group functions within non-uniform flow control, including +additional scan and reduction operators. + +The extension provides the following functions: + + * <> + * <> + * <> + +endif::cl_khr_subgroup_non_uniform_arithmetic[] + + +ifdef::cl_khr_subgroup_non_uniform_vote[] +[[cl_khr_subgroup_non_uniform_vote,cl_khr_subgroup_non_uniform_vote]] +==== Built-in Non-Uniform Vote and Election Functions for Sub-Groups + +The `cl_khr_subgroup_non_uniform_vote` extension adds the ability to elect a +single work item from a sub-group to perform a task and to hold votes among +work items in a sub-group. + +The extension provides the following functions: + + * <> + +endif::cl_khr_subgroup_non_uniform_vote[] + + +ifdef::cl_khr_subgroup_rotate[] +[[cl_khr_subgroup_rotate,cl_khr_subgroup_rotate]] +==== Sub-Group Rotation + +The `cl_khr_subgroup_rotate` extension adds support for a new sub-group data +exchange operation that makes it possible to rotate values through the work +items in a sub-group. + +The extension provides the following functions: + + * <> + +endif::cl_khr_subgroup_rotate[] + + +ifdef::cl_khr_subgroup_shuffle[] +[[cl_khr_subgroup_shuffle,cl_khr_subgroup_shuffle]] +==== General Purpose Shuffles + +The `cl_khr_subgroup_shuffle` extension adds additional ways to exchange +data among work items in a sub-group. + +The extension provides the following functions: + + * <> + +endif::cl_khr_subgroup_shuffle[] + + +ifdef::cl_khr_subgroup_shuffle_relative[] +[[cl_khr_subgroup_shuffle_relative,cl_khr_subgroup_shuffle_relative]] +==== Relative Shuffles + +The `cl_khr_subgroup_shuffle_relative` extension adds specialized ways to +exchange data among work items in a sub-group that may perform better on +some implementations. + +The extension provides the following functions: + + * <> + +endif::cl_khr_subgroup_shuffle_relative[] + + +ifdef::cl_khr_work_group_uniform_arithmetic[] +[[cl_khr_work_group_uniform_arithmetic,cl_khr_work_group_uniform_arithmetic]] +==== Work-group Collective Uniform Arithmetic Functions + +The `cl_khr_work_group_uniform_arithmetic` extension adds additional +work-group collective functions, including work-group scans and reductions +for the following operators: + + * Logical operations (`and`, `or`, and `xor`). + * Bitwise operations (`and`, `or`, and `xor`). + * Integer multiplication (`mul`). + * Floating-point multiplication (`mul`). + +The extension provides the following functions: + + * <> + * <> + * <> +endif::cl_khr_work_group_uniform_arithmetic[] + + [[supported-data-types]] == Supported Data Types @@ -267,9 +719,9 @@ The following table describes the list of built-in scalar data types. [[table-builtin-scalar-types]] .Built-in Scalar Data Types -[cols=",",] +[cols=",",options="header",] |==== -| *Type* | *Description* +| Type | Description | `bool` footnote:[{fn-bool}] | A conditional data type which is either _true_ or _false_. The value _true_ expands to the integer constant 1 and the value @@ -296,15 +748,14 @@ The following table describes the list of built-in scalar data types. storage format. | `double` footnote:[{fn-double}] | A 64-bit floating-point number. - The `double` data type must conform to the IEEE 754 double precision + The `double` data type must conform to the IEEE 754 double-precision storage format. - <> support for OpenCL C 1.2 or newer. In - OpenCL C 3.0 it requires support of the {opencl_c_fp64} feature. - Also see extension *cl_khr_fp64*. + <> support for <>. | `half` | A 16-bit floating-point number. - The `half` data type must conform to the IEEE 754-2008 half precision + The `half` data type must conform to the IEEE 754-2008 half-precision storage format. | `size_t` footnote:size_t[{fn-size_t}] | The unsigned integer type of the result of the `sizeof` operator. @@ -325,21 +776,15 @@ The following table describes the list of built-in scalar data types. type that cannot be completed. |==== -If the double-precision floating-point extension *cl_khr_fp64* or the -{opencl_c_fp64} feature is not supported, implementations may -implicitly cast double-precision floating-point literals to -single-precision literals. The use of double-precision literals without -double-precision support should result in a diagnostic. - Most built-in scalar data types are also declared as appropriate types in the OpenCL API (and header files) that can be used by an application. The following table describes the built-in scalar data type in the OpenCL C programming language and the corresponding data type available to the application: -[cols=",",] +[cols=",",options="header",] |==== -| *Type in OpenCL Language* | *API type for application* +| Type in OpenCL Language | API type for application | `bool` | n/a | `char` | `cl_char` | `unsigned char`, `uchar` | `cl_uchar` @@ -361,6 +806,20 @@ application: -- +[[double-precision-support]] +==== Double-Precision Floating-Point Support + +Double-precision floating-point is supported if +ifdef::cl_khr_fp64[the `<>` extension macro is supported, or if] +OpenCL 1.2 or newer is supported. +In OpenCL 3.0, it also requires support for the {opencl_c_fp64} feature, + +If double-precision is not supported, implementations may +implicitly cast double-precision floating-point literals to +single-precision literals. The use of double-precision literals without +double-precision support should result in a diagnostic. + + [[the-half-data-type]] ==== The `half` Data Type @@ -381,6 +840,8 @@ Conversions from `float` to `half` correctly round the mantissa to 11 bits of precision. Conversions from `half` to `float` are lossless; all `half` numbers are exactly representable as `float` values. +Conversions from `double` to `half` are correctly rounded. +Conversions from `half` to `double` are lossless. The `half` data type can only be used to declare a pointer to a buffer that contains `half` values. @@ -449,9 +910,9 @@ The following table describes the list of built-in vector data types. [[table-builtin-vector-types]] .Built-in Vector Data Types -[cols=",",] +[cols=",",options="header",] |==== -| *Type* | *Description* +| Type | Description | `char__n__` | A vector of _n_ 8-bit signed two's complement integer values. | `uchar__n__` @@ -468,14 +929,17 @@ The following table describes the list of built-in vector data types. | A vector of _n_ 64-bit signed two's complement integer values. | `ulong__n__` footnote:long-vec[] | A vector of _n_ 64-bit unsigned integer values. +ifdef::cl_khr_fp16[] +| `half__n__` footnote:[{fn-half-supported}] + | A vector of _n_ 16-bit floating-point values. +endif::cl_khr_fp16[] | `float__n__` | A vector of _n_ 32-bit floating-point values. | `double__n__` footnote:[{fn-double-vec}] | A vector of _n_ 64-bit floating-point values. - <> support for OpenCL C 1.2 or newer. In - OpenCL C 3.0 it requires support of the {opencl_c_fp64} feature. - Also see extension *cl_khr_fp64*. + <> support for <>. |==== The built-in vector data types are also declared as appropriate types in the @@ -484,9 +948,9 @@ The following table describes the built-in vector data type in the OpenCL C programming language and the corresponding data type available to the application: -[cols=",",] +[cols=",",options="header",] |==== -| *Type in OpenCL Language* | *API type for application* +| Type in OpenCL Language | API type for application | `char__n__` | `cl_char__n__` | `uchar__n__` | `cl_uchar__n__` | `short__n__` | `cl_short__n__` @@ -495,6 +959,9 @@ application: | `uint__n__` | `cl_uint__n__` | `long__n__` | `cl_long__n__` | `ulong__n__` | `cl_ulong__n__` +ifdef::cl_khr_fp16[] +| `half__n__` | `cl_half__n__` +endif::cl_khr_fp16[] | `float__n__` | `cl_float__n__` | `double__n__` | `cl_double__n__` |==== @@ -512,9 +979,9 @@ OpenCL. [[table-other-builtin-types]] .Other Built-in Data Types -[cols=",",] +[cols=",",options="header",] |==== -| *Type* | *Description* +| Type | Description | `image2d_t` footnote:image-functions[{fn-image-functions}] | A 2D image. | `image3d_t` footnote:image-functions[] @@ -538,13 +1005,13 @@ OpenCL. | `image2d_depth_t` footnote:image-functions[] | A 2D depth image. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | `image2d_array_depth_t` footnote:image-functions[] | A 2D depth image array. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | `sampler_t` footnote:image-functions[] | A sampler type. | `queue_t` @@ -586,6 +1053,40 @@ OpenCL. These flags are described in detail in the <> section. +ifdef::cl_khr_gl_msaa_sharing[] +| `image2d_msaa_t` + | A 2D multi-sample color image. + Refer to the <> section for a detailed description + of the built-in functions that use this type. + + <> support for the + `<>` extension macro. +| `image2d_array_msaa_t` + | A 2D multi-sample color image array. + Refer to the <> section for a detailed description + of the built-in functions that use this type. + + <> support for the + `<>` extension macro. +| `image2d_msaa_depth_t` + | A 2D multi-sample depth image. + Refer to the <> section for a detailed description + of the built-in functions that use this type. + + <> support for the + `<>` extension macro. +| `image2d_array_msaa_depth_t` + | A 2D multi-sample depth image array. + Refer to the <> section for a detailed description + of the built-in functions that use this type. + + <> support for the + `<>` extension macro. +endif::cl_khr_gl_msaa_sharing[] |==== [NOTE] @@ -609,9 +1110,9 @@ The following tables describe the other built-in data types in OpenCL described in <> and the corresponding data type available to the application: -[cols=",",] +[cols=",",options="header",] |==== -| *Type in OpenCL C* | *API type for application* +| Type in OpenCL C | API type for application | `image2d_t` | `cl_mem` | `image3d_t` | `cl_mem` | `image2d_array_t` | `cl_mem` @@ -645,9 +1146,9 @@ are also reserved. [[table-reserved-types]] .Reserved Data Types -[cols=",",] +[cols=",",options="header",] |==== -| *Type* | *Description* +| Type | Description | `bool__n__` | A boolean vector. | `half__n__` @@ -674,7 +1175,7 @@ are also reserved. | An _n_ {times} _m_ matrix of single precision floating-point values stored in column-major order. | `double__n__x__m__` - | An _n_ {times} _m_ matrix of double precision floating-point values + | An _n_ {times} _m_ matrix of double-precision floating-point values stored in column-major order. | `long double`, `long double__n__` | A floating-point scalar and vector type with at least as much @@ -876,7 +1377,7 @@ The numeric indices that can be used are given in the table below: .Numeric indices for built-in vector data types [width="100%",cols="<34%,<66%",options="header"] |==== -| *Vector Components* | *Numeric indices that can be used* +| Vector Components | Numeric indices that can be used | 2-component | 0, 1 | 3-component | 0, 1, 2 | 4-component | 0, 1, 2, 3 @@ -1229,9 +1730,9 @@ following table. [[table-rounding-mode]] .Rounding Modes -[cols=",",] +[cols=",",options="header",] |==== -| *Modifier* | *Rounding Mode Description* +| Modifier | Rounding Mode Description | `_rte` | Round to nearest even | `_rtz` | Round toward zero | `_rtp` | Round toward positive infinity @@ -1248,6 +1749,7 @@ footnote:[{fn-float-conversion-rounding}] use the default rounding mode. The only default floating-point rounding mode supported is round to nearest even i.e the default rounding mode will be `_rte` for floating-point types. + [[out-of-range-behavior]] ==== Out-of-Range Behavior and Saturated Conversions @@ -1328,7 +1830,7 @@ float4 f = convert_float4_rtp( i ); [[reinterpreting-data-as-another-type]] -=== Reinterpreting Data As Another Type +=== Reinterpreting Data as Another Type It is frequently necessary to reinterpret bits in a data type as another data type in OpenCL. @@ -1355,7 +1857,7 @@ Examples: [source,opencl_c] ---------- -// d only if double precision is supported +// d only if double-precision is supported union { float f; uint u; double d; } u; u.u = 1; // u.f contains 2**-149. u.d is undefined -- @@ -1433,7 +1935,7 @@ short8 j = as_short8(i); float4 f; // Error. Result and operand have different sizes -double4 g = as_double4(f); // Only if double precision is supported. +double4 g = as_double4(f); // Only if double-precision is supported. float4 f; // Legal. g.xyz will have same values as f.xyz. g.w is undefined @@ -1647,16 +2149,21 @@ The result is a scalar signed integer of type `int` if the source operands are scalar and a vector signed integer type of the same size as the source operands if the source operands are vector types. Vector source operands of type `char__n__` and `uchar__n__` return a -`char__n__` result; vector source operands of type `short__n__` and +`char__n__` result; vector source operands of type +ifdef::cl_khr_fp16[] +`_half__n__` footnote:[{fn-half-supported}], +endif::cl_khr_fp16[] +`short__n__` and `ushort__n__` return a `short__n__` result; vector source operands of type `int__n__`, `uint__n__` and `float__n__` return an `int__n__` result; vector source operands of type `long__n__`, `ulong__n__` and `double__n__` return a `long__n__` result. + For scalar types, the relational operators shall return 0 if the specified -relation is _false_ and 1 if the specified relation is _true_. +relation is _false_ and return 1 if the specified relation is _true_. For vector types, the relational operators shall return 0 if the specified -relation is _false_ and -1 (i.e. all bits set) if the specified relation is -_true_. +relation is _false_ and return -1 (i.e. all bits set) if the specified +relation is _true_. The relational operators always return 0 if either argument is not a number (NaN). -- @@ -1690,17 +2197,21 @@ The result is a scalar signed integer of type `int` if the source operands are scalar and a vector signed integer type of the same size as the source operands if the source operands are vector types. Vector source operands of type `char__n__` and `uchar__n__` return a -`char__n__` result; vector source operands of type `short__n__` and +`char__n__` result; vector source operands of type +ifdef::cl_khr_fp16[] +`_half__n__` footnote:[{fn-half-supported}], +endif::cl_khr_fp16[] +`short__n__` and `ushort__n__` return a `short__n__` result; vector source operands of type `int__n__`, `uint__n__` and `float__n__` return an `int__n__` result; vector source operands of type `long__n__`, `ulong__n__` and `double__n__` return a `long__n__` result. -For scalar types, the equality operators return 0 if the specified relation -is _false_ and return 1 if the specified relation is _true_. +For scalar types, the equality operators shall return 0 if the specified +relation is _false_ and return 1 if the specified relation is _true_. For vector types, the equality operators shall return 0 if the specified -relation is _false_ and -1 (i.e. all bits set) if the specified relation is -_true_. +relation is _false_ and return -1 (i.e. all bits set) if the specified +relation is _true_. The equality operator equal (*==*) returns 0 if one or both arguments are not a number (NaN). The equality operator not equal (*!=*) returns 1 (for scalar source @@ -1724,6 +2235,10 @@ vector operand. The scalar type is then widened to a vector that has the same number of components as the vector operand. The operation is done component-wise resulting in the same size vector. +ifdef::cl_khr_fp16[] +Vector source operands of type `_half__n__` footnote:[{fn-half-supported}] +return a `short__n__` result. +endif::cl_khr_fp16[] -- @@ -1753,16 +2268,20 @@ The result is a scalar signed integer of type `int` if the source operands are scalar and a vector signed integer type of the same size as the source operands if the source operands are vector types. Vector source operands of type `char__n__` and `uchar__n__` return a -`char__n__` result; vector source operands of type `short__n__` and +`char__n__` result; vector source operands of type +ifdef::cl_khr_fp16[] +`_half__n__` footnote:[{fn-half-supported}], +endif::cl_khr_fp16[] +`short__n__` and `ushort__n__` return a `short__n__` result; vector source operands of type `int__n__`, `uint__n__` and `float__n__` return an `int__n__` result; vector source operands of type `long__n__`, `ulong__n__` and `double__n__` return a `long__n__` result. For scalar types, the logical operators shall return 0 if the result of the -operation is _false_ and 1 if the result is _true_. +operation is _false_ and return 1 if the result is _true_. For vector types, the logical operators shall return 0 if the result of the -operation is _false_ and -1 (i.e. all bits set) if the result is _true_. +operation is _false_ and return -1 (i.e. all bits set) if the result is _true_. -- @@ -1779,18 +2298,22 @@ The result is a scalar signed integer of type `int` if the source operands are scalar and a vector signed integer type of the same size as the source operands if the source operands are vector types. Vector source operands of type `char__n__` and `uchar__n__` return a -`char__n__` result; vector source operands of type `short__n__` and +`char__n__` result; vector source operands of type +ifdef::cl_khr_fp16[] +`_half__n__` footnote:[{fn-half-supported}], +endif::cl_khr_fp16[] +`short__n__` and `ushort__n__` return a `short__n__` result; vector source operands of type `int__n__`, `uint__n__` and `float__n__` return an `int__n__` result; vector source operands of type `long__n__`, `ulong__n__` and `double__n__` return a `long__n__` result. -For scalar types, the result of the logical unary operator is 0 if the value -of its operand compares unequal to 0, and 1 if the value of its operand +For scalar types, the logical unary operator shall return 0 if the value of +its operand compares unequal to 0, and return 1 if the value of its operand compares equal to 0. -For vector types, the unary operator shall return a 0 if the value of its -operand compares unequal to 0, and -1 (i.e. all bits set) if the value of -its operand compares equal to 0. +For vector types, the unary operator shall return 0 if the value of its +operand compares unequal to 0, and return -1 (i.e. all bits set) if the +value of its operand compares equal to 0. -- @@ -2304,7 +2827,7 @@ kernel void foo(int a) -- -=== Usage for declaration scopes and variable types +=== Usage for Declaration Scopes and Variable Types -- This section describes use of address space qualifiers with respect to declaration scopes or variable types. @@ -2512,10 +3035,7 @@ Qualifiers must be explicitly specified for: .Address space behavior [width="100%",cols="1,2,2,2",options="header"] |==== -| *Address Space* - | *Supported Usage* - | *Initialization* - | *Inference* +| Address Space | Supported Usage | Initialization | Inference | `{global}` | Program scope variables, for OpenCL C 2.0 or @@ -2578,7 +3098,7 @@ Qualifiers must be explicitly specified for: -- [[addr-spaces-conversions]] -=== Address space conversions +=== Address Space Conversions -- @@ -3064,7 +3584,7 @@ foo (read_only image2d_t imageA, } ---------- -imageA is a read-only 2D image object, and image is a write-only 2D image +`imageA` is a read-only 2D image object, and `imageB` is a write-only 2D image object. The sampler-less read image and write image built-ins can be used with image @@ -3179,7 +3699,7 @@ Advanced Vector Instructions (Intel^{reg}^ AVX) which implements a work-items to one thread, running a second work-item in the high half of the 256-bit AVX register. -As another example, a Power4 machine has two scalar double precision +As another example, a Power4 machine has two scalar double-precision floating-point units with an 6-cycle deep pipe. An autovectorizer for the Power4 machine might choose to interleave six kernels declared with the `+__attribute__(( vec_type_hint (double2)))+` @@ -3337,8 +3857,12 @@ address space qualifiers. floating-point arithmetic can be performed. . Whether or not irreducible control flow is illegal is implementation defined. - . The following restriction only applies to OpenCL C 1.0, also see the - *cl_khr_byte_addressable_store* extension. + . The following restriction only applies to +ifndef::cl_khr_byte_addressable_store[OpenCL C 1.0: +] +ifdef::cl_khr_byte_addressable_store[] + OpenCL C 1.0, and only if the `<>` + extension macro is not supported: + +endif::cl_khr_byte_addressable_store[] Built-in types that are less than 32-bits in size, i.e. `char`, `uchar`, `char2`, `uchar2`, `short`, `ushort`, and `half`, have the following restriction: @@ -3400,6 +3924,17 @@ Program scope variables can be declared with `{constant}` address space qualifiers or if {opencl_c_program_scope_global_variables} feature is supported with `{global}` address space qualifier. -- +ifdef::cl_khr_initialize_memory[] + . [[restrictions-initialize-memory]] The following restriction only + applies if the `<>` extension is supported: + + If the context is created with `CL_CONTEXT_MEMORY_INITIALIZE_KHR`, + appropriate memory locations as specified by the bit-field are + initialized with zeroes, prior to the start of execution of any kernel. + The driver chooses when, prior to kernel execution, the initialization of + local and/or private memory is performed. + The only requirement is there should be no values set from outside the + context, which can be read during a kernel execution. +endif::cl_khr_initialize_memory[] [[preprocessor-directives-and-macros]] @@ -3489,7 +4024,7 @@ The following predefined macro names are available. Used to determine the current rounding mode and is set to rte. Only affects the rounding mode of conversions to a float type. <> OpenCL C 1.1, along with the - *cl_khr_select_fprounding_mode* extension. + `<>` extension. `+__ENDIAN_LITTLE__+` :: Used to determine if the OpenCL device is a little endian architecture @@ -3897,7 +4432,7 @@ defined. [[specifying-attribute-for-unrolling-loops]] -=== Specifying Attribute For Unrolling Loops +=== Specifying Attribute for Unrolling Loops [open,refpage='attributes-loopUnroll',desc='Specifying Attribute For Unrolling Loops',type='freeform',spec='clang',anchor='specifying-attribute-for-unrolling-loops'] -- @@ -4376,7 +4911,6 @@ that operate on mixed scalar and vector types, however. [open,refpage='workItemFunctions',desc='Work-Item Functions',type='freeform',spec='clang',anchor='work-item-functions',xrefs='',alias='get_enqueued_local_size get_global_id get_global_linear_id get_global_offset get_global_size get_group_id get_local_id get_local_linear_id get_local_size get_num_groups get_work_dim'] -- - The following table describes the list of built-in work-item functions that can be used to query the number of dimensions, the global and local work size specified to *clEnqueueNDRangeKernel*, and the global and local @@ -4384,9 +4918,9 @@ identifier of each work-item when this kernel is being executed on a device. [[table-work-item-functions]] .Built-in Work-Item Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | uint *get_work_dim*() | Returns the number of dimensions in use. This is the value given to the _work_dim_ argument specified in @@ -4503,19 +5037,20 @@ identifier of each work-item when this kernel is being executed on a device. |==== NOTE: The functionality described in the following table <> support for OpenCL C 3.0 or newer and the {opencl_c_subgroups} -feature. +requires>> support for +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. The following table describes the list of built-in work-item functions that can be used to query the size of a sub-group, number of sub-groups per work-group, and identifier of the sub-group within a work-group and work-item within a sub-group when this kernel is being executed on a device. -.Built-in Work-Item Functions for Sub-groups +[[table-subgroup-work-item-functions]] +.Built-in Work-Item Functions for Sub-Groups [cols="a,",options="header",] |==== -| *Function* -| *Description* +| Function | Description | uint *get_sub_group_size*() | Returns the number of work-items in the sub-group. @@ -4568,7 +5103,6 @@ sub-group when this kernel is being executed on a device. [open,refpage='mathFunctions',desc='Math Functions',type='freeform',spec='clang',anchor='math-functions',xrefs='commonFunctions integerFunctions',alias='acos acosh acospi asin asinh asinpi atan atan2 atan2pi atanh atanpi cbrt ceil copysign cos cosh cospi divide erf erfc exp exp10 exp2 expm1 fabs fdim floor fma fmax fmin fmod fract frexp half_cos half_divide half_exp half_exp10 half_exp2 half_log half_log10 half_log2 half_powr half_recip half_rsqrt half_sin half_sqrt half_tan hypot ilogb ldexp lgamma lgamma_r log log10 log1p log2 logb mad maxmag minmag modf nan native_cos native_divide native_exp native_exp10 native_exp2 native_log native_log10 native_log2 native_powr native_recip native_rsqrt native_sin native_sqrt native_tan nextafter pow pown powr recip remainder remquo rint rootn round rsqrt sin sincos sinh sinpi sqrt tan tanh tanpi tgamma trunc'] -- - The built-in math functions are categorized into the following: * A list of built-in functions that have scalar or vector argument @@ -4582,27 +5116,56 @@ The built-in math functions are not affected by the prevailing rounding mode in the calling environment, and always return the same value as they would if called with the round to nearest even rounding mode. -The <> -table describes the list of built-in math functions that can take scalar or -vector arguments. -We use the generic type name `gentype` to indicate that the function can take -`float`, `float2`, `float3`, `float4`, `float8`, `float16`, `double` -footnote:double-supported[{fn-double-supported}], `double2`, -`double3`, `double4`, `double8` or `double16` as the type for the arguments. -We use the generic type name `gentypef` to indicate that the function can -take `float`, `float2`, `float3`, `float4`, `float8`, or `float16` as the -type for the arguments. -We use the generic type name `gentyped` footnote:double-supported[] to -indicate that the function can take `double`, `double2`, `double3`, `double4`, -`double8` or `double16` as the type for the arguments. -For any specific use of a function, the actual type has to be the same for -all arguments and the return type, unless otherwise specified. +The <> table describes the list of built-in math functions that can +take scalar or vector arguments. + +The generic type name `gentype` indicates that the function can take any of + + * `float`, `float2`, `float3`, `float4`, `float8`, or `float16` + * `double` footnote:double-supported[{fn-double-supported}], `double2`, + `double3`, `double4`, `double8` or `double16` +ifdef::cl_khr_fp16[] + * `half` footnote:[{fn-half-supported}], `half2`, `half3`, `half4`, + `half8` or `half16` +endif::cl_khr_fp16[] + +as the type for the arguments. + +The generic type name `gentypef` indicates that the function can take any of + + * `float`, `float2`, `float3`, `float4`, `float8`, or `float16` + +as the type for the arguments. + +The generic type name `gentyped` footnote:[{fn-double-supported}] indicates +that the function can take any of + + * `double`, `double2`, `double3`, `double4`, `double8` or `double16` + +as the type for the arguments. + +ifdef::cl_khr_fp16[] +The generic type name `gentypeh` footnote:[{fn-half-supported}] indicates +that the function can take any of + + * `half`, `half2`, `half3`, `half4`, `half8` or `half16` + +as the type for the arguments. + +NOTE: All functions taking or returning `half` types are supported only when +the `<>` extension macro is supported. +endif::cl_khr_fp16[] + +For any specific use of a function with `gentype*` arguments the actual type +has to be the same for all arguments and the return type, unless they are +explicitly specified as an actual type. [[table-builtin-math]] .Built-in Scalar and Vector Argument Math Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* |*Description* +| Function | Description | gentype *acos*(gentype) | Arc cosine function. Returns an angle in radians. | gentype *acosh*(gentype) @@ -4642,7 +5205,7 @@ all arguments and the return type, unless otherwise specified. | Complementary error function. | gentype *erf*(gentype) | Error function encountered in integrating the - http://mathworld.wolfram.com/NormalDistribution.html[_normal + https://mathworld.wolfram.com/NormalDistribution.html[_normal distribution_]. | gentype *exp*(gentype _x_) | Compute the base-_e_ exponential of _x_. @@ -4655,7 +5218,7 @@ all arguments and the return type, unless otherwise specified. | gentype *fabs*(gentype) | Compute absolute value of a floating-point number. | gentype *fdim*(gentype _x_, gentype _y_) - | _x_ - _y_ if _x_ > _y_, +0 if _x_ is less than or equal to y. + | _x_ - _y_ if _x_ > _y_, +0 if _x_ is less than or equal to _y_. | gentype *floor*(gentype) | Round to integral value using the round to negative infinity rounding mode. @@ -4667,12 +5230,16 @@ all arguments and the return type, unless otherwise specified. | gentype *fmax*(gentype _x_, gentype _y_) + gentypef *fmax*(gentypef _x_, float _y_) + gentyped *fmax*(gentyped _x_, double _y_) + +ifdef::cl_khr_fp16[gentypeh *fmax*(gentypeh _x_, half _y_)] | Returns _y_ if _x_ < _y_, otherwise it returns _x_. If one argument is a NaN, *fmax*() returns the other argument. If both arguments are NaNs, *fmax*() returns a NaN. | gentype *fmin*(gentype _x_, gentype _y_) + gentypef *fmin*(gentypef _x_, float _y_) + gentyped *fmin*(gentyped _x_, double _y_) + +ifdef::cl_khr_fp16[gentypeh *fmax*(gentypeh _x_, half _y_)] | Returns _y_ if _y_ < _x_, otherwise it returns _x_. If one argument is a NaN, *fmin*() returns the other argument. If both arguments are NaNs, *fmin*() returns a NaN. @@ -4682,26 +5249,48 @@ all arguments and the return type, unless otherwise specified. Returns _x_ - _y_ * *trunc*(_x_/_y_). | gentype *fract*(gentype _x_, {global} gentype _*iptr_) + gentype *fract*(gentype _x_, {local} gentype _*iptr_) + - gentype *fract*(gentype _x_, {private} gentype _*iptr_) + + gentype *fract*(gentype _x_, {private} gentype _*iptr_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: gentype *fract*(gentype _x_, gentype _*iptr_) +// TODO The fp16 extension uses the constant `0x1.ffcp-1f` below - unclear +// why, see the OpenCL-Docs issue. | Returns *fmin*(_x_ - *floor*(_x_), `0x1.fffffep-1f`). *floor*(x) is returned in _iptr_. footnote:[{fn-fract-min}] +ifdef::cl_khr_fp16[] +| half__n__ **frexp**(half__n__ _x_, {global} int__n__ *exp) + + half **frexp**(half _x_, {global} int *exp) + + half__n__ **frexp**(half__n__ _x_, {local} int__n__ *exp) + + half **frexp**(half _x_, {local} int *exp) + + half__n__ **frexp**(half__n__ _x_, {private} int__n__ *exp) + + half **frexp**(half _x_, {private} int *exp) + + For OpenCL C 2.0, or OpenCL C 3.0 or newer with the + {opencl_c_generic_address_space} feature: + + half__n__ **frexp**(half__n__ _x_, int__n__ *exp) + + half **frexp**(half _x_, int *exp) + | Extract mantissa and exponent from _x_. + For each component the mantissa returned is a `half` with magnitude in + the interval [1/2, 1) or 0. + Each component of _x_ equals mantissa returned * 2__^exp^__. +endif::cl_khr_fp16[] | float__n__ **frexp**(float__n__ _x_, {global} int__n__ *exp) + - float **frexp**(float _x_, {global} int *exp) + + float **frexp**(float _x_, {global} int *exp) float__n__ **frexp**(float__n__ _x_, {local} int__n__ *exp) + - float **frexp**(float _x_, {local} int *exp) + + float **frexp**(float _x_, {local} int *exp) float__n__ **frexp**(float__n__ _x_, {private} int__n__ *exp) + - float **frexp**(float _x_, {private} int *exp) + + float **frexp**(float _x_, {private} int *exp) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float__n__ **frexp**(float__n__ _x_, int__n__ *exp) + float **frexp**(float _x_, int *exp) @@ -4710,16 +5299,16 @@ all arguments and the return type, unless otherwise specified. in the interval [1/2, 1) or 0. Each component of _x_ equals mantissa returned * 2__^exp^__. | double__n__ **frexp**(double__n__ _x_, {global} int__n__ *exp) + - double **frexp**(double _x_, {global} int *exp) + + double **frexp**(double _x_, {global} int *exp) double__n__ **frexp**(double__n__ _x_, {local} int__n__ *exp) + - double **frexp**(double _x_, {local} int *exp) + + double **frexp**(double _x_, {local} int *exp) double__n__ **frexp**(double__n__ _x_, {private} int__n__ *exp) + - double **frexp**(double _x_, {private} int *exp) + + double **frexp**(double _x_, {private} int *exp) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: double__n__ **frexp**(double__n__ _x_, int__n__ *exp) + double **frexp**(double _x_, int *exp) @@ -4734,6 +5323,11 @@ all arguments and the return type, unless otherwise specified. int *ilogb*(float _x_) + int__n__ *ilogb*(double__n__ _x_) + int *ilogb*(double _x_) + +ifdef::cl_khr_fp16[] + int__n__ *ilogb*(half__n__ _x_) + + int *ilogb*(half _x_) +endif::cl_khr_fp16[] | Return the exponent as an integer value. | float__n__ *ldexp*(float__n__ _x_, int__n__ _k_) + float__n__ *ldexp*(float__n__ _x_, int _k_) + @@ -4741,31 +5335,55 @@ all arguments and the return type, unless otherwise specified. double__n__ *ldexp*(double__n__ _x_, int__n__ _k_) + double__n__ *ldexp*(double__n__ _x_, int _k_) + double *ldexp*(double _x_, int _k_) +ifdef::cl_khr_fp16[] + half__n__ *ldexp*(half__n__ _x_, int__n__ _k_) + + half__n__ *ldexp*(half__n__ _x_, int _k_) + + half *ldexp*(half _x_, int _k_) +endif::cl_khr_fp16[] | Multiply _x_ by 2 to the power _k_. | gentype *lgamma*(gentype _x_) + - float__n__ **lgamma_r**(float__n__ _x_, {global} int__n__ *_signp_) + float **lgamma_r**(float _x_, {global} int *_signp_) + double__n__ **lgamma_r**(double__n__ _x_, {global} int__n__ *_signp_) + - double **lgamma_r**(double _x_, {global} int *_signp_) + + double **lgamma_r**(double _x_, {global} int *_signp_) + +ifdef::cl_khr_fp16[] + half__n__ **lgamma_r**(half__n__ _x_, {global} int__n__ *_signp_) + + half **lgamma_r**(half _x_, {global} int *_signp_) + +endif::cl_khr_fp16[] float__n__ **lgamma_r**(float__n__ _x_, {local} int__n__ *_signp_) + float **lgamma_r**(float _x_, {local} int *_signp_) + double__n__ **lgamma_r**(double__n__ _x_, {local} int__n__ *_signp_) + - double **lgamma_r**(double _x_, {local} int *_signp_) + + double **lgamma_r**(double _x_, {local} int *_signp_) + +ifdef::cl_khr_fp16[] + half__n__ **lgamma_r**(half__n__ _x_, {local} int__n__ *_signp_) + + half **lgamma_r**(half _x_, {local} int *_signp_) + +endif::cl_khr_fp16[] float__n__ **lgamma_r**(float__n__ _x_, {private} int__n__ *_signp_) + float **lgamma_r**(float _x_, {private} int *_signp_) + double__n__ **lgamma_r**(double__n__ _x_, {private} int__n__ *_signp_) + - double **lgamma_r**(double _x_, {private} int *_signp_) + + double **lgamma_r**(double _x_, {private} int *_signp_) + +ifdef::cl_khr_fp16[] + half__n__ **lgamma_r**(half__n__ _x_, {private} int__n__ *_signp_) + + half **lgamma_r**(half _x_, {private} int *_signp_) + +endif::cl_khr_fp16[] For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float__n__ **lgamma_r**(float__n__ _x_, int__n__ *_signp_) + float **lgamma_r**(float _x_, int *_signp_) + double__n__ **lgamma_r**(double__n__ _x_, int__n__ *_signp_) + double **lgamma_r**(double _x_, int *_signp_) + +ifdef::cl_khr_fp16[] + half__n__ **lgamma_r**(half__n__ _x_, int__n__ *_signp_) + + half **lgamma_r**(half _x_, int *_signp_) +endif::cl_khr_fp16[] | Log gamma function. Returns the natural logarithm of the absolute value of the gamma function. @@ -4784,11 +5402,12 @@ all arguments and the return type, unless otherwise specified. log__~r~__(\|_x_\|). | gentype *mad*(gentype _a_, gentype _b_, gentype _c_) | *mad* computes _a_ * _b_ + _c_. - The function may compute _a_ * _b_ + _c_ with reduced accuracy - in the embedded profile. See the OpenCL SPIR-V Environment Specification - for details. On some hardware the mad instruction may provide better - performance than expanded computation of _a_ * _b_ + _c_. - footnote:[{fn-mad-caution}] + The function may compute _a_ * _b_ + _c_ with reduced accuracy in the + embedded profile. + See the OpenCL SPIR-V Environment Specification for details. + On some hardware the mad instruction may provide better performance + than expanded computation of _a_ * _b_ + _c_. + footnote:[{fn-mad-caution}] | gentype *maxmag*(gentype _x_, gentype _y_) | Returns _x_ if \|_x_\| > \|_y_\|, _y_ if \|_y_\| > \|_x_\|, otherwise *fmax*(_x_, _y_). @@ -4801,10 +5420,10 @@ all arguments and the return type, unless otherwise specified. <> support for OpenCL C 1.1 or newer. | gentype *modf*(gentype _x_, {global} gentype _*iptr_) + gentype *modf*(gentype _x_, {local} gentype _*iptr_) + - gentype *modf*(gentype _x_, {private} gentype _*iptr_) + + gentype *modf*(gentype _x_, {private} gentype _*iptr_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: gentype *modf*(gentype _x_, gentype _*iptr_) | Decompose a floating-point number. @@ -4815,9 +5434,16 @@ all arguments and the return type, unless otherwise specified. float *nan*(uint _nancode_) + double__n__ *nan*(ulong__n__ _nancode_) + double *nan*(ulong _nancode_) + +ifdef::cl_khr_fp16[] + half__n__ *nan*(ushort__n__ _nancode_) + + half *nan*(ushort _nancode_) +endif::cl_khr_fp16[] | Returns a quiet NaN. The _nancode_ may be placed in the significand of the resulting NaN. | gentype *nextafter*(gentype _x_, gentype _y_) +// TODO shouldn't this be "next representable FP value of the precision of +// its arguments"? See the OpenCL-Docs issue. | Computes the next representable floating-point value following _x_ in the direction of _y_. Thus, if _y_ is less than _x_, *nextafter*() returns the largest @@ -4828,6 +5454,11 @@ all arguments and the return type, unless otherwise specified. float *pown*(float _x_, int _y_) + double__n__ *pown*(double__n__ _x_, int__n__ _y_) + double *pown*(double _x_, int _y_) + +ifdef::cl_khr_fp16[] + half__n__ *pown*(half__n__ _x_, int__n__ _y_) + + half *pown*(half _x_, int _y_) +endif::cl_khr_fp16[] | Compute _x_ to the power _y_, where _y_ is an integer. | gentype *powr*(gentype _x_, gentype _y_) | Compute _x_ to the power _y_, where _x_ is >= 0. @@ -4838,16 +5469,16 @@ all arguments and the return type, unless otherwise specified. one. If _r_ is zero, it is given the same sign as _x_. | float__n__ **remquo**(float__n__ _x_, float__n__ _y_, {global} int__n__ _*quo_) + - float **remquo**(float _x_, float _y_, {global} int _*quo_) + + float **remquo**(float _x_, float _y_, {global} int _*quo_) float__n__ **remquo**(float__n__ _x_, float__n__ _y_, {local} int__n__ _*quo_) + - float **remquo**(float _x_, float _y_, {local} int _*quo_) + + float **remquo**(float _x_, float _y_, {local} int _*quo_) float__n__ **remquo**(float__n__ _x_, float__n__ _y_, {private} int__n__ _*quo_) + - float **remquo**(float _x_, float _y_, {private} int _*quo_) + + float **remquo**(float _x_, float _y_, {private} int _*quo_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float__n__ **remquo**(float__n__ _x_, float__n__ _y_, int__n__ _*quo_) + float **remquo**(float _x_, float _y_, int _*quo_) @@ -4861,16 +5492,16 @@ all arguments and the return type, unless otherwise specified. _x_/_y_, and gives that value the same sign as _x_/_y_. It stores this signed value in the object pointed to by _quo_. | double__n__ **remquo**(double__n__ _x_, double__n__ _y_, {global} int__n__ _*quo_) + - double **remquo**(double _x_, double _y_, {global} int _*quo_) + + double **remquo**(double _x_, double _y_, {global} int _*quo_) double__n__ **remquo**(double__n__ _x_, double__n__ _y_, {local} int__n__ _*quo_) + - double **remquo**(double _x_, double _y_, {local} int _*quo_) + + double **remquo**(double _x_, double _y_, {local} int _*quo_) double__n__ **remquo**(double__n__ _x_, double__n__ _y_, {private} int__n__ _*quo_) + - double **remquo**(double _x_, double _y_, {private} int _*quo_) + + double **remquo**(double _x_, double _y_, {private} int _*quo_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: double__n__ **remquo**(double__n__ _x_, double__n__ _y_, int__n__ _*quo_) + double **remquo**(double _x_, double _y_, int _*quo_) @@ -4883,6 +5514,31 @@ all arguments and the return type, unless otherwise specified. *remquo* also calculates the lower seven bits of the integral quotient _x_/_y_, and gives that value the same sign as _x_/_y_. It stores this signed value in the object pointed to by _quo_. +ifdef::cl_khr_fp16[] +| half__n__ **remquo**(half__n__ _x_, half__n__ _y_, {global} int__n__ _*quo_) + + half **remquo**(half _x_, half _y_, {global} int _*quo_) + + half__n__ **remquo**(half__n__ _x_, half__n__ _y_, {local} int__n__ _*quo_) + + half **remquo**(half _x_, half _y_, {local} int _*quo_) + + half__n__ **remquo**(half__n__ _x_, half__n__ _y_, {private} int__n__ _*quo_) + + half **remquo**(half _x_, half _y_, {private} int _*quo_) + + For OpenCL C 2.0 or with the + {opencl_c_generic_address_space} feature: + + half__n__ **remquo**(half__n__ _x_, half__n__ _y_, int__n__ _*quo_) + + half **remquo**(half _x_, half _y_, int _*quo_) + | The *remquo* function computes the value r such that _r_ = _x_ - + _k_*_y_, where _k_ is the integer nearest the exact value of _x_/_y_. + If there are two integers closest to _x_/_y_, _k_ shall be the even + one. + If _r_ is zero, it is given the same sign as _x_. + This is the same value that is returned by the *remainder* function. + *remquo* also calculates the lower seven bits of the integral quotient + _x_/_y_, and gives that value the same sign as _x_/_y_. + It stores this signed value in the object pointed to by _quo_. +endif::cl_khr_fp16[] | gentype *rint*(gentype) | Round to integral value (using round to nearest even rounding mode) in floating-point format. @@ -4891,6 +5547,11 @@ all arguments and the return type, unless otherwise specified. float *rootn*(float _x_, int _y_) + double__n__ *rootn*(double__n__ _x_, int__n__ _y_) + double *rootn*(double _x_, int _y_) + +ifdef::cl_khr_fp16[] + half__n__ *rootn*(half__n__ _x_, int__n__ _y_) + + half *rootn*(half _x_, int _y_) +endif::cl_khr_fp16[] | Compute _x_ to the power 1/_y_. | gentype *round*(gentype _x_) | Return the integral value nearest to _x_ rounding halfway cases away @@ -4901,13 +5562,13 @@ all arguments and the return type, unless otherwise specified. | Compute sine, where _x_ is an angle in radians. | gentype *sincos*(gentype _x_, {global} gentype _*cosval_) + gentype *sincos*(gentype _x_, {local} gentype _*cosval_) + - gentype *sincos*(gentype _x_, {private} gentype _*cosval_) + + gentype *sincos*(gentype _x_, {private} gentype _*cosval_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: gentype *sincos*(gentype _x_, gentype _*cosval_) - | Compute sine and cosine of x. + | Compute sine and cosine of _x_. The computed sine is the return value and computed cosine is returned in _cosval_, where _x_ is an angle in radians. | gentype *sinh*(gentype _x_) @@ -4948,11 +5609,17 @@ We use the generic type name `gentype` to indicate that the functions in the following table can take `float`, `float2`, `float3`, `float4`, `float8` or `float16` as the type for the arguments. +ifdef::cl_khr_fp16[] +NOTE: The use of `half` in this table does not refer to the argument and +return types, which are 32-bit floating-point values, but to the accuracy +requirements of the function results. +endif::cl_khr_fp16[] + [[table-builtin-half-native-math]] .Built-in Scalar and Vector _half_ and _native_ Math Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | gentype *half_cos*(gentype _x_) | Compute cosine. _x_ is an angle in radians, and must be in the range [-2^16^, +2^16^]. @@ -5051,13 +5718,13 @@ functions. [open,refpage='mathConstants',desc='Math Constants',type='freeform',spec='clang',anchor='table-builtin-half-native-math',xrefs='mathFunctions',alias='MAXFLOAT HUGE_VALF INFINITY NAN HUGE_VAL'] -- -The following symbolic constants are available. +The following constants are available. Their values are of type `float` and are accurate within the precision of a single precision floating-point number. -[cols=",",] +[cols=",",options="header",] |==== -| *Constant Name* | *Description* +| Constant Name | Description | `MAXFLOAT` | Value of maximum non-infinite single-precision floating-point number. | `HUGE_VALF` @@ -5071,13 +5738,12 @@ single precision floating-point number. | A constant expression of type `float` representing a quiet NaN. |==== -If double precision is supported by the device, e.g. for OpenCL C 3.0 or newer -the {opencl_c_fp64} feature macro is present, the following symbolic -constants will also be available: +If <>, then the following constants are also available: -[cols=",",] +[cols=",",options="header",] |==== -| *Constant Name* | *Description* +| Constant Name | Description | `HUGE_VAL` | A positive double constant expression. `HUGE_VAL` evaluates to +infinity. @@ -5087,11 +5753,10 @@ constants will also be available: [[floating-point-macros-and-pragmas]] -==== Floating-point macros and pragmas +==== Floating-point Macros and Pragmas [open,refpage='fpMacros',desc='Floating-Point Macros And Pragmas',type='freeform',spec='clang',anchor='floating-point-macros-and-pragmas',xrefs='integerMacros',alias='FP_CONTRACT FP_FAST_FMAF FP_FAST_FMA macroLimits'] -- - The `FP_CONTRACT` pragma can be used to allow (if the state is on) or disallow (if the state is off) the implementation to contract expressions. Each pragma can occur either outside external declarations or preceding all @@ -5143,9 +5808,9 @@ The following table describes the built-in macro names given above in the OpenCL C programming language and the corresponding macro names available to the application. -[cols=",",] +[cols=",",options="header",] |==== -| *Macro in OpenCL Language* | *Macro for application* +| Macro in OpenCL Language | Macro for application | `FLT_DIG` | `CL_FLT_DIG` | `FLT_MANT_DIG` | `CL_FLT_MANT_DIG` | `FLT_MAX_10_EXP` | `CL_FLT_MAX_10_EXP` @@ -5167,9 +5832,9 @@ The following constants are also available. They are of type `float` and are accurate within the precision of the `float` type. -[cols=",",] +[cols=",",options="header",] |==== -| *Constant* | *Description* +| Constant | Description | `M_E_F` | Value of _e_ | `M_LOG2E_F` | Value of log~2~e | `M_LOG10E_F` | Value of log~10~e @@ -5185,12 +5850,11 @@ They are of type `float` and are accurate within the precision of the | `M_SQRT1_2_F` | Value of 1 / {sqrt}2 |==== -If double precision is supported by the device, e.g. for OpenCL C 3.0 or newer -the {opencl_c_fp64} feature macro is present, then the following macros -and constants are also available: +If <>, then the following macros and constants are also available: The `FP_FAST_FMA` macro indicates whether the *fma*() family of functions -are fast compared with direct code for double precision floating-point. +are fast compared with direct code for double-precision floating-point. If defined, the `FP_FAST_FMA` macro shall indicate that the *fma*() function generally executes about as fast as, or faster than, a multiply and an add of `double` operands @@ -5216,9 +5880,9 @@ The following table describes the built-in macro names given above in the OpenCL C programming language and the corresponding macro names available to the application. -[cols=",",] +[cols=",",options="header",] |==== -| *Macro in OpenCL Language* | *Macro for application* +| Macro in OpenCL Language | Macro for application | `DBL_DIG` | `CL_DBL_DIG` | `DBL_MANT_DIG` | `CL_DBL_MANT_DIG` | `DBL_MAX_10_EXP` | `CL_DBL_MAX_10_EXP` @@ -5234,9 +5898,9 @@ The following constants are also available. They are of type ``double`` and are accurate within the precision of the double type. -[cols=",",] +[cols=",",options="header",] |==== -| *Constant* | *Description* +| Constant | Description | `M_E` | Value of _e_ | `M_LOG2E` | Value of log~2~e | `M_LOG10E` | Value of log~10~e @@ -5251,6 +5915,78 @@ double type. | `M_SQRT2` | Value of {sqrt}2 | `M_SQRT1_2` | Value of 1 / {sqrt}2 |==== + +ifdef::cl_khr_fp16[] +If the `<>` extension macro is supported, then the following +macros and constants are also available: + +The `FP_FAST_FMA_HALF` macro indicates whether the *fma*() family of +functions are fast compared with direct code for half-precision +floating-point. +If defined, the `FP_FAST_FMA_HALF` macro shall indicate that the *fma*() +function generally executes about as fast as, or faster than, a multiply and +an add of `half` operands. + +The macro names given in the following list must use the values specified. +These constant expressions are suitable for use in #if preprocessing +directives. + +[source,opencl_c] +---- +#define HALF_DIG 3 +#define HALF_MANT_DIG 11 +#define HALF_MAX_10_EXP +4 +#define HALF_MAX_EXP +16 +#define HALF_MIN_10_EXP -4 +#define HALF_MIN_EXP -13 +#define HALF_RADIX 2 +#define HALF_MAX 0x1.ffcp15h +#define HALF_MIN 0x1.0p-14h +#define HALF_EPSILON 0x1.0p-10h +---- + +The following table describes the built-in macro names given above in the +OpenCL C programming language and the corresponding macro names available to +the application. + +[cols=",",options="header",] +|==== +| Macro in OpenCL Language | Macro for application +| `HALF_DIG` | `CL_HALF_DIG` +| `HALF_MANT_DIG` | `CL_HALF_MANT_DIG` +| `HALF_MAX_10_EXP` | `CL_HALF_MAX_10_EXP` +| `HALF_MAX_EXP` | `CL_HALF_MAX_EXP` +| `HALF_MIN_10_EXP` | `CL_HALF_MIN_10_EXP` +| `HALF_MIN_EXP` | `CL_HALF_MIN_EXP` +| `HALF_RADIX` | `CL_HALF_RADIX` +| `HALF_MAX` | `CL_HALF_MAX` +| `HALF_MIN` | `CL_HALF_MIN` +| `HALF_EPSILSON` | `CL_HALF_EPSILON` +|==== + +The following constants are also available. +They are of type `half` and are accurate within the precision of the `half` +type. + +[cols=",",options="header",] +|==== +| Constant | Description +| `M_E_H` | Value of e +| `M_LOG2E_H` | Value of log~2~e +| `M_LOG10E_H` | Value of log~10~e +| `M_LN2_H` | Value of log~e~2 +| `M_LN10_H` | Value of log~e~10 +| `M_PI_H` | Value of {pi} +| `M_PI_2_H` | Value of {pi} / 2 +| `M_PI_4_H` | Value of {pi} / 4 +| `M_1_PI_H` | Value of 1 / {pi} +| `M_2_PI_H` | Value of 2 / {pi} +| `M_2_SQRTPI_H` | Value of 2 / {sqrt}{pi} +| `M_SQRT2_H` | Value of {sqrt}2 +| `M_SQRT1_2_H` | Value of 1 / {sqrt}2 +|==== +endif::cl_khr_fp16[] + -- @@ -5285,14 +6021,15 @@ For vector versions, `sgentype` is implicitly widened to `gentype` as described for <>. _n_ is 2, 3, 4, 8, or 16. -For any specific use of a function, the actual type has to be the same for -all arguments and the return type unless otherwise specified. +For any specific use of a function with `gentype*` arguments the actual type +has to be the same for all arguments and the return type, unless they are +explicitly specified as an actual type. [[table-builtin-functions]] .Built-in Scalar and Vector Integer Argument Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | ugentype *abs*(gentype _x_) | Returns \|x\|. | ugentype *abs_diff*(gentype _x_, gentype _y_) @@ -5323,19 +6060,70 @@ all arguments and the return type unless otherwise specified. type of _x_, if _x_ is a vector. <> support for OpenCL 2.0 or newer. +ifdef::cl_khr_integer_dot_product[] +| uint *dot*(uchar4 a, uchar4 b) + + int *dot*(char4 a, char4 b) + + int *dot*(uchar4 a, char4 b) + + int *dot*(char4 a, uchar4 b) + | `dot` returns the dot product of the two input vectors `a` and `b`. + The components of `a` and `b` are sign- or zero-extended to the width + of the destination type and the vectors with extended components are + multiplied component-wise. + All the components of the resulting vectors are added together to form + the final result. + + <> that the + {opencl_c_integer_dot_product_input_4x8bit} feature macro is defined, + +| uint *dot_acc_sat*(uchar4 a, uchar4 b, uint acc) + + int *dot_acc_sat*(char4 a, char4 b, int acc) + + int *dot_acc_sat*(uchar4 a, char4 b, int acc) + + int *dot_acc_sat*(char4 a, uchar4 b, int acc) + a| `dot_acc_sat` returns the saturating addition of the dot product of + the two input vectors `a` and `b` and the accumulator `acc`: +---- +product = dot(a,b); +result = add_sat(product, acc); +---- + +<> that the +{opencl_c_integer_dot_product_input_4x8bit} feature macro is defined, + +| uint *dot_4x8packed_uu_uint*(uint a, uint b) + + int *dot_4x8packed_ss_int*(uint a, uint b) + + int *dot_4x8packed_us_int*(uint a, uint b) + + int *dot_4x8packed_su_int*(uint a, uint b) + | Returns *dot* for 4x8 bit input vectors packed into a 32-bit word. + + <> that the + {opencl_c_integer_dot_product_input_4x8bit_packed} feature macro is + defined, + +| uint *dot_acc_sat_4x8packed_uu_uint*(uint a, uint b, uint acc) + + int *dot_acc_sat_4x8packed_ss_int*(uint a, uint b, int acc) + + int *dot_acc_sat_4x8packed_us_int*(uint a, uint b, int acc) + + int *dot_acc_sat_4x8packed_su_int*(uint a, uint b, int acc) + | Returns *dot_acc_set* for 4x8 bit input vectors packed into a 32-bit + word. + + <> that the + {opencl_c_integer_dot_product_input_4x8bit_packed} feature macro is + defined, +endif::cl_khr_integer_dot_product[] + | gentype *mad_hi*(gentype _a_, gentype _b_, gentype _c_) | Returns *mul_hi*(_a_, _b_) + _c_. | gentype *mad_sat*(gentype _a_, gentype _b_, gentype _c_) | Returns _a_ * _b_ + _c_ and saturates the result. -| gentype *max*(gentype _x_, gentype _y_) + +| gentype *max*(gentype _x_, gentype _y_) - For OpenCL C 1.1 or newer: + + For OpenCL C 1.1 or newer: gentype *max*(gentype _x_, sgentype _y_) | Returns _y_ if _x_ < _y_, otherwise it returns _x_. -| gentype *min*(gentype _x_, gentype _y_) + +| gentype *min*(gentype _x_, gentype _y_) - For OpenCL C 1.1 or newer: + + For OpenCL C 1.1 or newer: gentype *min*(gentype _x_, sgentype _y_) | Returns _y_ if _y_ < _x_, otherwise it returns _x_. @@ -5369,7 +6157,7 @@ all arguments and the return type unless otherwise specified. | _result_[i] = ((long)_hi_[i] << 32) \| _lo_[i] + _result_[i] = ((ulong)_hi_[i] << 32) \| _lo_[i] | gentype *popcount*(gentype _x_) - | Returns the number of non-zero bits in _x_. + + | Returns the number of non-zero bits in _x_. <> support for OpenCL C 1.2 or newer. |==== @@ -5382,9 +6170,9 @@ take `int`, `int2`, `int3`, `int4`, `int8`, `int16`, `uint`, `uint2`, [[table-builtin-fast-integer]] .Built-in 24-bit Integer Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | gentype *mad24*(gentype _x_, gentype _y_, gentype z) | Multipy two 24-bit integer values _x_ and _y_ and add the 32-bit integer result to the 32-bit integer _z_. @@ -5403,6 +6191,132 @@ take `int`, `int2`, `int3`, `int4`, `int8`, `int16`, `uint`, `uint2`, -- +ifdef::cl_khr_extended_bit_ops[] +[[extended-bit-operations]] +==== Extended Bit Operations + +[open,refpage='extendedBitOperations',desc='Extended Bit Operations',type='freeform',spec='clang',anchor='extended-bit-operations',xrefs='commonFunctions',alias='bitfield_insert bitfield_extract_signed bitfield_extract_unsigned bit_reverse'] +-- +If the `<>` extension macro is supported, the +functions described in the <> table can be used with built-in +scalar or vector integer types to perform extended bit operations. +The functions that operate on vector types operate component-wise. +The description is per-component. + +In the table below, the generic type name `gentype` refers to the built-in +integer types `char`, `char__n__`, `uchar`, `uchar__n__`, `short`, +`short__n__`, `ushort`, `ushort__n__`, `int`, `int__n__`, `uint`, +`uint__n__`, `long`, `long__n__`, `ulong`, and `ulong__n__`. +The generic type name `igentype` refers to the built-in signed integer types +`char`, `char__n__`, `short`, `short__n__`, `int`, `int__n__`, `long`, and +`long__n__`. +The generic type name `ugentype` refers to the built-in unsigned integer +types `uchar`, `uchar__n__`, `ushort`, `ushort__n__`, `uint`, `uint__n__`, +`ulong`, and `ulong__n__`. +_n_ is 2, 3, 4, 8, or 16. + +[[table-builtin-extended-bit-operations]] +.Built-in Scalar and Vector Extended Bit Operations +[cols="1a,1", options="header"] +|=== +| Function | Description +a| +[source,opencl_c] +---- +gentype bitfield_insert( + gentype base, gentype insert, + uint offset, uint count) +---- + | Returns a copy of _base_, with a modified bitfield that comes from + _insert_. + + Any bits of the result value numbered outside [_offset_, _offset_ {plus} + _count_ - 1] (inclusive) will come from the corresponding bits in + _base_. + + Any bits of the result value numbered inside [_offset_, _offset_ {plus} + _count_ - 1] (inclusive) will come from the bits numbered [0, _count_ + - 1] (inclusive) of _insert_. + + _count_ is the number of bits to be modified. + If _count_ equals 0, the return value will be equal to _base_. + + If _count_ or _offset_ or _offset_ + _count_ is greater than number of + bits in `gentype` (for scalar types) or components of `gentype` (for + vector types), the result is undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +igentype bitfield_extract_signed( + gentype base, + uint offset, uint count) +---- + | Returns an extracted bitfield from _base_ with sign extension. + The type of the return value is always a signed type. + + The bits of _base_ numbered in [_offset_, _offset_ + _count_ - 1] + (inclusive) are returned as the bits numbered in [0, _count_ - 1] + (inclusive) of the result. + The remaining bits in the result will be sign extended by replicating + the bit numbered _offset_ + _count_ - 1 of _base_. + + _count_ is the number of bits to be extracted. + If _count_ equals 0, the result is 0. + + If the _count_ or _offset_ or _offset_ + _count_ is greater than + number of bits in `gentype` (for scalar types) or components of + `gentype` (for vector types), the result is undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +ugentype bitfield_extract_unsigned( + gentype base, + uint offset, uint count) +---- + | Returns an extracted bitfield from _base_ with zero extension. + The type of the return value is always an unsigned type. + + The bits of _base_ numbered in [_offset_, _offset_ + _count_ - 1] + (inclusive) are returned as the bits numbered in [0, _count_ - 1] + (inclusive) of the result. + The remaining bits in the result will be zero. + + _count_ is the number of bits to be extracted. + If _count_ equals 0, the result is 0. + + If the _count_ or _offset_ or _offset_ + _count_ is greater than + number of bits in `gentype` (for scalar types) or components of + `gentype` (for vector types), the result is undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +gentype bit_reverse( + gentype base) +---- + | Returns the value of _base_ with reversed bits. + That is, the bit numbered _n_ of the result value will be taken from + the bit numbered _width_ - _n_ - 1 of _base_ (for scalar types) or a + component of _base_ (for vector types), where _width_ is number of + bits of `gentype` (for scalar types) or components of `gentype` (for + vector types). + + <> support for the + `<>` extension macro. +|=== +-- +endif::cl_khr_extended_bit_ops[] + + [[integer-macros]] ==== Integer Macros @@ -5435,9 +6349,9 @@ The following table describes the built-in macro names given above in the OpenCL C programming language and the corresponding macro names available to the application. -[cols=",",] +[cols=",",options="header",] |==== -| *Macro in OpenCL Language* | *Macro for application* +| Macro in OpenCL Language | Macro for application | `CHAR_BIT` | `CL_CHAR_BIT` | `CHAR_MAX` | `CL_CHAR_MAX` | `CHAR_MIN` | `CL_CHAR_MIN` @@ -5466,25 +6380,54 @@ The <> describes the list of built-in common functions. These all operate component-wise. The description is per-component. -We use the generic type name `gentype` to indicate that the function can take -`float`, `float2`, `float3`, `float4`, `float8`, `float16`, `double` -footnote:[{fn-double-supported}], `double2`, `double3`, `double4`, -`double8` or `double16` as the type for the arguments. -We use the generic type name `gentypef` to indicate that the function can -take `float`, `float2`, `float3`, `float4`, `float8`, or `float16` as the -type for the arguments. -We use the generic type name `gentyped` to indicate that the function can -take `double`, `double2`, `double3`, `double4`, `double8` or `double16` as -the type for the arguments. + +The generic type name `gentype` indicates that the function can take any of + + * `float`, `float2`, `float3`, `float4`, `float8`, or `float16` + * `double` footnote:double-supported[{fn-double-supported}], `double2`, + `double3`, `double4`, `double8` or `double16` +ifdef::cl_khr_fp16[] + * `half` footnote:[{fn-half-supported}], `half2`, `half3`, `half4`, + `half8` or `half16` +endif::cl_khr_fp16[] + +as the type for the arguments. + +The generic type name `gentypef` indicates that the function can take any of + + * `float`, `float2`, `float3`, `float4`, `float8`, or `float16` + +as the type for the arguments. + +The generic type name `gentyped` footnote:[{fn-double-supported}] indicates +that the function can take any of + + * `double`, `double2`, `double3`, `double4`, `double8` or `double16` + +as the type for the arguments. + +ifdef::cl_khr_fp16[] +The generic type name `gentypeh` footnote:[{fn-half-supported}] indicates +that the function can take any of + + * `half`, `half2`, `half3`, `half4`, `half8` or `half16` + +as the type for the arguments. + +NOTE: All functions taking or returning `half` types are supported only when +the `<>` extension macro is supported. +endif::cl_khr_fp16[] [[table-builtin-common]] .Built-in Scalar and Vector Argument Common Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | gentype *clamp*(gentype _x_, gentype _minval_, gentype _maxval_) + gentypef *clamp*(gentypef _x_, float _minval_, float _maxval_) + gentyped *clamp*(gentyped _x_, double _minval_, double _maxval_) + +ifdef::cl_khr_fp16[gentypeh *clamp*(gentypeh _x_, half _minval_, half _maxval_)] | Returns *fmin*(*fmax*(_x_, _minval_), _maxval_). Results are undefined if _minval_ > _maxval_. | gentype *degrees*(gentype _radians_) @@ -5492,32 +6435,47 @@ the type for the arguments. | gentype *max*(gentype _x_, gentype _y_) + gentypef *max*(gentypef _x_, float _y_) + gentyped *max*(gentyped _x_, double _y_) + +ifdef::cl_khr_fp16[gentypeh *max*(gentypeh _x_, half _y_)] | Returns _y_ if _x_ < _y_, otherwise it returns _x_. If _x_ or _y_ are infinite or NaN, the return values are undefined. | gentype *min*(gentype _x_, gentype _y_) + gentypef *min*(gentypef _x_, float _y_) + gentyped *min*(gentyped _x_, double _y_) + +ifdef::cl_khr_fp16[gentypeh *min*(gentypeh _x_, half _y_)] | Returns _y_ if _y_ < _x_, otherwise it returns _x_. If _x_ or _y_ are infinite or NaN, the return values are undefined. | gentype *mix*(gentype _x_, gentype _y_, gentype _a_) + gentypef *mix*(gentypef _x_, gentypef _y_, float _a_) + gentyped *mix*(gentyped _x_, gentyped _y_, double _a_) - | Returns the linear blend of _x_ & _y_ implemented as: + +ifdef::cl_khr_fp16[gentypeh *mix*(gentypeh _x_, gentypeh _y_, half _a_)] + a| Returns the linear blend of _x_ and _y_ implemented as: _x_ + (_y_ - _x_) * _a_ _a_ must be a value in the range [0.0, 1.0]. If _a_ is not in the range [0.0, 1.0], the return values are undefined. + +ifdef::cl_khr_fp16[] +NOTE: The half-precision *mix* function can be implemented using +contractions such as *mad* or *fma*. +endif::cl_khr_fp16[] | gentype *radians*(gentype _degrees_) | Converts _degrees_ to radians, i.e. ({pi} / 180) * _degrees_. | gentype *step*(gentype _edge_, gentype _x_) + gentypef *step*(float _edge_, gentypef _x_) + gentyped *step*(double _edge_, gentyped _x_) + +ifdef::cl_khr_fp16[gentypeh *step*(half _edge_, gentypeh _x_)] | Returns 0.0 if _x_ < _edge_, otherwise it returns 1.0. | gentype *smoothstep*(gentype _edge0_, gentype _edge1_, gentype _x_) + gentypef *smoothstep*(float _edge0_, float _edge1_, gentypef _x_) + gentyped *smoothstep*(double _edge0_, double _edge1_, gentyped _x_) + +ifdef::cl_khr_fp16[gentypeh *smoothstep*(half _edge0_, half _edge1_, gentypeh _x_)] a| Returns 0.0 if _x_ \<= _edge0_ and 1.0 if _x_ >= _edge1_ and performs smooth Hermite interpolation between 0 and 1 when _edge0_ < _x_ < _edge1_. @@ -5536,6 +6494,10 @@ return t * t * (3 - 2 * t); Results are undefined if _edge0_ >= _edge1_ or if _x_, _edge0_ or _edge1_ is a NaN. +ifdef::cl_khr_fp16[] +NOTE: The half-precision *mix* function can be implemented using +contractions such as *mad* or *fma*. +endif::cl_khr_fp16[] | gentype *sign*(gentype _x_) | Returns 1.0 if _x_ > 0, -0.0 if _x_ = -0.0, +0.0 if _x_ = +0.0, or -1.0 if _x_ < 0. @@ -5550,42 +6512,83 @@ a NaN. [open,refpage='geometricFunctions',desc='Geometric Functions',type='freeform',spec='clang',anchor='geometric-functions',xrefs='integerFunctions',alias='cross dot distance length normalize fast_distance fast_length fast_normalize'] -- +// TODO It is not actually true that these functions operate - +// TODO in general they *combine* components. The <> describes the list of built-in geometric functions. These all operate component-wise. The description is per-component. -`float__n__` is `float`, `float2`, `float3`, or `float4` and `double__n__` is -`double` footnote:[{fn-double-supported}], `double2`, `double3`, or -`double4`. + +The generic type name `gentypef` indicates that the function can take any of + + * `float`, `float2`, `float3`, or `float4` + +as the type for the arguments. + +The generic type name `gentyped` footnote:[{fn-double-supported}] indicates +that the function can take any of + + * `double`, `double2`, `double3`, or `double4` + +as the type for the arguments. + +ifdef::cl_khr_fp16[] +The generic type name `gentypeh` footnote:[{fn-half-supported}] indicates +that the function can take any of + + * `half`, `half2`, `half3`, or `half4` + +as the type for the arguments. + +NOTE: All functions taking or returning `half` types are supported only when +the `<>` extension macro is supported. +endif::cl_khr_fp16[] + +For any specific use of a function with `gentype*` arguments the actual type +has to be the same for all arguments and the return type, unless they are +explicitly specified as an actual type. [[table-builtin-geometric]] .Built-in Scalar and Vector Argument Geometric Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | float4 *cross*(float4 _p0_, float4 _p1_) + float3 *cross*(float3 _p0_, float3 _p1_) + double4 *cross*(double4 _p0_, double4 _p1_) + double3 *cross*(double3 _p0_, double3 _p1_) + +ifdef::cl_khr_fp16[] + half4 *cross*(half4 _p0_, half4 _p1_) + + half3 *cross*(half3 _p0_, half3 _p1_) +endif::cl_khr_fp16[] | Returns the cross product of _p0.xyz_ and _p1.xyz_. The _w_ component of `float4` result returned will be 0.0. -| float *dot*(float__n__ _p0_, float__n__ _p1_) + - double *dot*(double__n__ _p0_, double__n__ _p1_) - | Compute dot product. -| float *distance*(float__n__ _p0_, float__n__ _p1_) + - double *distance*(double__n__ _p0_, double__n__ _p1_) +| float *dot*(gentypef _p0_, gentypef _p1_) + + double *dot*(gentyped _p0_, gentyped _p1_) + +ifdef::cl_khr_fp16[half *dot*(gentypeh _p0_, gentypeh _p1_)] + | Compute the dot product of _p0_ and _p1_. +| float *distance*(gentypef _p0_, gentypef _p1_) + + double *distance*(gentyped _p0_, gentyped _p1_) + +ifdef::cl_khr_fp16[half *distance*(gentypeh _p0_, gentypeh _p1_)] | Returns the distance between _p0_ and _p1_. This is calculated as *length*(_p0_ - _p1_). -| float *length*(float__n__ _p_) + - double *length*(double__n__ _p_) +| float *length*(gentypef _p_) + + double *length*(gentyped _p_) + +ifdef::cl_khr_fp16[half *length*(gentypeh _p_)] | Return the length of vector _p_, i.e., {sqrt} __p.x__^2^ + _p.y_ ^2^ {plus} ... -| float__n__ *normalize*(float__n__ _p_) + - double__n__ *normalize*(double__n__ _p_) +| gentypef *normalize*(gentypef _p_) + + gentyped *normalize*(gentyped _p_) + +ifdef::cl_khr_fp16[gentypeh *normalize*(gentypeh _p_)] | Returns a vector in the same direction as _p_ but with a length of 1. | | -| float *fast_distance*(float__n__ _p0_, float__n__ _p1_) +| float *fast_distance*(float _p0_, float__n__ _p1_) | Returns *fast_length*(_p0_ - _p1_). | float *fast_length*(float__n__ _p_) | Returns the length of vector _p_ computed as: @@ -5627,7 +6630,6 @@ with the following exceptions: [open,refpage='relationalFunctions',desc='Relational Functions',type='freeform',spec='clang',anchor='relational-functions',xrefs='integerFunctions',alias='all any bitselect isequal isfinite isgreater isgreaterequal isinf isless islessequal islessgreater isnan isnormal isnotequal isordered isunordered select signbit'] -- - The <> and <> operators (*<*, *\<=*, *>*, *>=*, *!=*, *==*) can be used with scalar and vector built-in types and produce a scalar or vector signed integer result @@ -5669,89 +6671,159 @@ not a number (NaN) and the argument type is a vector. [[table-builtin-relational]] .Built-in Scalar and Vector Relational Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int *isequal*(float _x_, float _y_) + int__n__ *isequal*(float__n__ _x_, float__n__ _y_) + int *isequal*(double _x_, double _y_) + long__n__ *isequal*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isequal*(half _x_, half _y_) + + short__n__ *isequal*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ == _y_. | int *isnotequal*(float _x_, float _y_) + int__n__ *isnotequal*(float__n__ _x_, float__n__ _y_) + int *isnotequal*(double _x_, double _y_) + long__n__ *isnotequal*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isnotequal*(half _x_, half _y_) + + short__n__ *isnotequal*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ != _y_. | int *isgreater*(float _x_, float _y_) + int__n__ *isgreater*(float__n__ _x_, float__n__ _y_) + int *isgreater*(double _x_, double _y_) + long__n__ *isgreater*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isgreater*(half _x_, half _y_) + + short__n__ *isgreater*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ > _y_. | int *isgreaterequal*(float _x_, float _y_) + int__n__ *isgreaterequal*(float__n__ _x_, float__n__ _y_) + int *isgreaterequal*(double _x_, double _y_) + long__n__ *isgreaterequal*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isgreaterequal*(half _x_, half _y_) + + short__n__ *isgreaterequal*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ >= _y_. | int *isless*(float _x_, float _y_) + int__n__ *isless*(float__n__ _x_, float__n__ _y_) + int *isless*(double _x_, double _y_) + long__n__ *isless*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isless*(half _x_, half _y_) + + short__n__ *isless*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ < _y_. | int *islessequal*(float _x_, float _y_) + int__n__ *islessequal*(float__n__ _x_, float__n__ _y_) + int *islessequal*(double _x_, double _y_) + long__n__ *islessequal*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *islessequal*(half _x_, half _y_) + + short__n__ *islessequal*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of _x_ \<= _y_. | int *islessgreater*(float _x_, float _y_) + int__n__ *islessgreater*(float__n__ _x_, float__n__ _y_) + int *islessgreater*(double _x_, double _y_) + long__n__ *islessgreater*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *islessgreater*(half _x_, half _y_) + + short__n__ *islessgreater*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Returns the component-wise compare of (_x_ < _y_) \|\| (_x_ > _y_) . | | | int *isfinite*(float) + int__n__ *isfinite*(float__n__) + int *isfinite*(double) + long__n__ *isfinite*(double__n__) + +ifdef::cl_khr_fp16[] + int *isfinite*(half) + + short__n__ *isfinite*(half__n__) +endif::cl_khr_fp16[] | Test for finite value. | int *isinf*(float) + int__n__ *isinf*(float__n__) + int *isinf*(double) + long__n__ *isinf*(double__n__) + +ifdef::cl_khr_fp16[] + int *isinf*(half) + + short__n__ *isinf*(half__n__) +endif::cl_khr_fp16[] | Test for infinity value (positive or negative). | int *isnan*(float) + int__n__ *isnan*(float__n__) + int *isnan*(double) + long__n__ *isnan*(double__n__) + +ifdef::cl_khr_fp16[] + int *isnan*(half) + + short__n__ *isnan*(half__n__) +endif::cl_khr_fp16[] | Test for a NaN. | int *isnormal*(float) + int__n__ *isnormal*(float__n__) + int *isnormal*(double) + long__n__ *isnormal*(double__n__) -| Test for a normal value. + +ifdef::cl_khr_fp16[] + int *isnormal*(half) + + short__n__ *isnormal*(half__n__) +endif::cl_khr_fp16[] + | Test for a normal value. | int *isordered*(float _x_, float _y_) + int__n__ *isordered*(float__n__ _x_, float__n__ _y_) + int *isordered*(double _x_, double _y_) + long__n__ *isordered*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isordered*(half _x_, half _y_) + + short__n__ *isordered*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Test if arguments are ordered. - *isordered*() takes arguments _x_ and _y_, and returns the result - *isequal*(_x_, _x_) && *isequal*(_y_, _y_). + *isordered*() takes arguments _x_ and _y_, and returns the result + *isequal*(_x_, _x_) && *isequal*(_y_, _y_). | int *isunordered*(float _x_, float _y_) + int__n__ *isunordered*(float__n__ _x_, float__n__ _y_) + int *isunordered*(double _x_, double _y_) + long__n__ *isunordered*(double__n__ _x_, double__n__ _y_) + +ifdef::cl_khr_fp16[] + int *isunordered*(half _x_, half _y_) + + short__n__ *isunordered*(half__n__ _x_, half__n__ _y_) +endif::cl_khr_fp16[] | Test if arguments are unordered. *isunordered*() takes arguments _x_ and _y_, returning non-zero if _x_ or _y_ is NaN, and zero otherwise. -| int *signbit*(float) + - int__n__ *signbit*(float__n__) + - int *signbit*(double) + - long__n__ *signbit*(double__n__) +| int *signbit*(float _x_) + + int__n__ *signbit*(float__n__ _x_) + + int *signbit*(double _x_) + + long__n__ *signbit*(double__n__ _x_) + +ifdef::cl_khr_fp16[] + int *signbit*(half _x_) + + short__n__ *signbit*(half__n__ _x_) +endif::cl_khr_fp16[] | Test for sign bit. - The scalar version of the function returns a 1 if the sign bit in the - float is set else returns 0. + The scalar version of the function returns a 1 if the sign bit in _x_ + is set else returns 0. The vector version of the function returns the following for each - component in `float__n__`: -1 (i.e all bits set) if the sign bit in the - float is set else returns 0. + component in _x_: -1 (i.e all bits set) if the sign bit in the float is + set else returns 0. | | | int *any*(igentype _x_) @@ -5790,62 +6862,79 @@ Scalar inputs to *all* are <> OpenCL C version [open,refpage='vectorDataLoadandStoreFunctions',desc='Vector Data Load and Store Functions',type='freeform',spec='clang',anchor='vector-data-load-and-store-functions',xrefs='',alias='vloadn vload_half vload_halfn vloada_halfn vstoren vstore_half vstore_halfn vstorea_halfn'] -- -The <> describes the list of supported -functions that allow you to read and write vector types from a pointer to -memory. -We use the generic type `gentype` to indicate the built-in data types -`char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long` footnote:[{fn-int64-supported}], `ulong`, -`float` or `double` footnote:[{fn-double-supported}]. -We use the generic type name `gentype__n__` to represent n-element vectors -of `gentype` elements. -We use the type name `half__n__` to represent n-element vectors of half +The <> table describes the list of supported functions that allow you +to read and write vector types from a pointer to memory. + +The generic type name `gentype` indicates that the function can take any of + + * `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long` + footnote:[{fn-int64-supported}] or `ulong` + * `float` or `double` footnote:double-supported[{fn-double-supported}] +ifdef::cl_khr_fp16[] + * `half` footnote:[{fn-half-supported}] + + +NOTE: All functions taking or returning `half` types are supported only when +the `<>` extension macro is supported. +endif::cl_khr_fp16[] + +as the type for the arguments. + +The generic type name `gentype__n__` indicates an _n_-element vector of +`gentype` elements. + +The generic type name `half__n__` indicates an _n_-element vector of `half` elements. + The suffix _n_ is also used in the function names (i.e. *vload__n__*, -*vstore__n__* etc.), where _n_ = 2, 3 footnote:[{fn-vec3-vload-vstore}], 4, 8 or -16. +*vstore__n__* etc.), where _n_ = 2, 3 footnote:[{fn-vec3-vload-vstore}], 4, +8 or 16. [[table-vector-loadstore]] .Built-in Vector Data Load and Store Functions -[cols="7,3",] +[cols="7,3",options="header",] |==== -| *Function* | *Description* +| Function | Description | gentype__n__ **vload__n__**(size_t _offset_, const {global} gentype *_p_) + gentype__n__ **vload__n__**(size_t _offset_, const {local} gentype *_p_) + gentype__n__ **vload__n__**(size_t _offset_, const {constant} gentype *_p_) + - gentype__n__ **vload__n__**(size_t _offset_, const {private} gentype *_p_) + + gentype__n__ **vload__n__**(size_t _offset_, const {private} gentype *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: gentype__n__ **vload__n__**(size_t _offset_, const gentype *_p_) | Return `sizeof(gentype__n__)` bytes of data, where the first `(__n__ * sizeof(gentype))` bytes are read from the address computed as `(_p_ {plus} (_offset_ * _n_))`. The computed address must be 8-bit aligned if `gentype` is `char` or - `uchar`; 16-bit aligned if `gentype` is `short` or `ushort`; 32-bit - aligned if `gentype` is `int`, `uint`, or `float`; and 64-bit aligned - if `gentype` is `long` or `ulong`. + `uchar`; 16-bit aligned if `gentype` is +ifdef::cl_khr_fp16[`half`,] + `short` or `ushort`; 32-bit aligned if `gentype` is `int`, `uint`, or + `float`; and 64-bit aligned if `gentype` is `long` or `ulong`. | void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {global} gentype *_p_) + void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {local} gentype *_p_) + - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {private} gentype *_p_) + + void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {private} gentype *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstore__n__**(gentype__n__ _data_, size_t _offset_, gentype *_p_) | Write `_n_ * sizeof(gentype)` bytes given by _data_ to the address computed as `(_p_ {plus} (_offset_ * _n_))`. The computed address must be 8-bit aligned if `gentype` is `char` or - `uchar`; 16-bit aligned if `gentype` is `short` or `ushort`; 32-bit - aligned if `gentype` is `int`, `uint`, or `float`; and 64-bit aligned - if `gentype` is `long` or `ulong`. + `uchar`; 16-bit aligned if `gentype` is +ifdef::cl_khr_fp16[`half`,] + `short` or `ushort`; 32-bit aligned if `gentype` is `int`, `uint`, or + `float`; and 64-bit aligned if `gentype` is `long` or `ulong`. | float **vload_half**(size_t _offset_, const {global} half *_p_) + float **vload_half**(size_t _offset_, const {local} half *_p_) + float **vload_half**(size_t _offset_, const {constant} half *_p_) + - float **vload_half**(size_t _offset_, const {private} half *_p_) + + float **vload_half**(size_t _offset_, const {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float **vload_half**(size_t _offset_, const half *_p_) | Read `sizeof(half)` bytes of data from the address computed as `(_p_ @@ -5857,10 +6946,10 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, | float__n__ **vload_half__n__**(size_t _offset_, const {global} half *_p_) + float__n__ **vload_half__n__**(size_t _offset_, const {local} half *_p_) + float__n__ **vload_half__n__**(size_t _offset_, const {constant} half *_p_) + - float__n__ **vload_half__n__**(size_t _offset_, const {private} half *_p_) + + float__n__ **vload_half__n__**(size_t _offset_, const {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float__n__ **vload_half__n__**(size_t _offset_, const half *_p_) | Read `(_n_ * sizeof(half))` bytes of data from the address computed as @@ -5873,22 +6962,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstore_half{rte}**(float _data_, size_t _offset_, {global} half *_p_) + void **vstore_half{rtz}**(float _data_, size_t _offset_, {global} half *_p_) + void **vstore_half{rtp}**(float _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rtn}**(float _data_, size_t _offset_, {global} half *_p_) + + void **vstore_half{rtn}**(float _data_, size_t _offset_, {global} half *_p_) void **vstore_half**(float _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rte}**(float _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rtz}**(float _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rtp}**(float _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rtn}**(float _data_, size_t _offset_, {local} half *_p_) + + void **vstore_half{rtn}**(float _data_, size_t _offset_, {local} half *_p_) void **vstore_half**(float _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rte}**(float _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rtz}**(float _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rtp}**(float _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rtn}**(float _data_, size_t _offset_, {private} half *_p_) + + void **vstore_half{rtn}**(float _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstore_half**(float _data_, size_t _offset_, half *_p_) + void **vstore_half{rte}**(float _data_, size_t _offset_, half *_p_) + @@ -5907,22 +6996,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstore_half__n__{rte}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstore_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstore_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + + void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {global} half *_p_) void **vstore_half__n__**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rte}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + + void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {local} half *_p_) void **vstore_half__n__**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rte}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + + void **vstore_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstore_half__n__**(float__n__ _data_, size_t _offset_, half *_p_) + void **vstore_half__n__{rte}**(float__n__ _data_, size_t _offset_, half *_p_) + @@ -5942,22 +7031,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstore_half{rte}**(double _data_, size_t _offset_, {global} half *_p_) + void **vstore_half{rtz}**(double _data_, size_t _offset_, {global} half *_p_) + void **vstore_half{rtp}**(double _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {global} half *_p_) + + void **vstore_half{rtn}**(double _data_, size_t _offset_, {global} half *_p_) void **vstore_half**(double _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rte}**(double _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rtz}**(double _data_, size_t _offset_, {local} half *_p_) + void **vstore_half{rtp}**(double _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {local} half *_p_) + + void **vstore_half{rtn}**(double _data_, size_t _offset_, {local} half *_p_) void **vstore_half**(double _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rte}**(double _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rtz}**(double _data_, size_t _offset_, {private} half *_p_) + void **vstore_half{rtp}**(double _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {private} half *_p_) + + void **vstore_half{rtn}**(double _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstore_half**(double _data_, size_t _offset_, half *_p_) + void **vstore_half{rte}**(double _data_, size_t _offset_, half *_p_) + @@ -5976,22 +7065,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + + void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) void **vstore_half__n__**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + + void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) void **vstore_half__n__**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + + void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstore_half__n__**(double__n__ _data_, size_t _offset_, half *_p_) + void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, half *_p_) + @@ -6009,10 +7098,10 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, | float__n__ **vloada_half__n__**(size_t _offset_, const {global} half *_p_) + float__n__ **vloada_half__n__**(size_t _offset_, const {local} half *_p_) + float__n__ **vloada_half__n__**(size_t _offset_, const {constant} half *_p_) + - float__n__ **vloada_half__n__**(size_t _offset_, const {private} half *_p_) + + float__n__ **vloada_half__n__**(size_t _offset_, const {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: float__n__ **vloada_half__n__**(size_t _offset_, const half *_p_) | For n = 2, 4, 8 and 16, read `sizeof(half__n__)` bytes of data from @@ -6030,22 +7119,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstorea_half__n__{rte}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstorea_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstorea_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {global} half *_p_) + + void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {global} half *_p_) void **vstorea_half__n__**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rte}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {local} half *_p_) + + void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {local} half *_p_) void **vstorea_half__n__**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rte}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rtz}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rtp}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {private} half *_p_) + + void **vstorea_half__n__{rtn}**(float__n__ _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstorea_half__n__**(float__n__ _data_, size_t _offset_, half *_p_) + void **vstorea_half__n__{rte}**(float__n__ _data_, size_t _offset_, half *_p_) + @@ -6069,22 +7158,22 @@ The suffix _n_ is also used in the function names (i.e. *vload__n__*, void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + + void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + + void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + + void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) For OpenCL C 2.0, or OpenCL C 3.0 or newer with the - {opencl_c_generic_address_space} feature: + + {opencl_c_generic_address_space} feature: void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, half *_p_) + void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, half *_p_) + @@ -6140,20 +7229,20 @@ in a work-group. [[table-builtin-synchronization]] .Built-in Work-group Synchronization Functions -[cols="3,7",] +[cols="3,7",options="header",] |==== -| *Function* | *Description* +| Function | Description | void *barrier*( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) - For OpenCL C 2.0 or newer, as an alias for *barrier*: + + For OpenCL C 2.0 or newer, as an alias for *barrier*: void *work_group_barrier*( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) void *work_group_barrier*( + - cl_mem_fence_flags _flags_, + + cl_mem_fence_flags _flags_, memory_scope _scope_) | For these functions, if any work-item in a work-group encounters a barrier, the barrier must be encountered by all work-items in the @@ -6200,20 +7289,21 @@ in a work-group. -- NOTE: The functionality described in the following table <> support for OpenCL 3.0 or newer and the {opencl_c_subgroups} -feature. +requires>> support for +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +OpenCL 3.0 or newer and the {opencl_c_subgroups} feature. The following table describes built-in functions to synchronize the work-items in a sub-group. -.Built-in Sub-group Synchronization Functions +[[table-synchronization-functions]] +.Built-in Sub-Group Synchronization Functions [cols="3,7",options="header",] |==== -| *Function* -| *Description* +| Function | Description | void **sub_group_barrier**( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) void **sub_group_barrier**( + cl_mem_fence_flags _flags_, + @@ -6277,12 +7367,12 @@ The OpenCL C programming language implements the following explicit memory fence [[table-builtin-explicit-memory-fences]] .Built-in Explicit Memory Fence Functions -[cols="3,7",] +[cols="3,7",options="header",] |==== -| *Function* | *Description* +| Function | Description | void *mem_fence*( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) | Orders loads and stores of a work-item executing a kernel. This means that loads and stores preceding the *mem_fence* will be committed to memory @@ -6297,7 +7387,7 @@ The OpenCL C programming language implements the following explicit memory fence The value of _flags_ must be the same for all work-items in the work-group. | void *read_mem_fence*( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) | Read memory barrier that orders only loads. @@ -6310,7 +7400,7 @@ The OpenCL C programming language implements the following explicit memory fence The value of _flags_ must be the same for all work-items in the work-group. | void *write_mem_fence*( + - cl_mem_fence_flags _flags_) + + cl_mem_fence_flags _flags_) | Write memory barrier that orders only stores. @@ -6344,9 +7434,9 @@ types supported by OpenCL C or a user defined type. [[table-builtin-address-qualifier]] .Built-in Address Space Qualifier Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | global gentype * **to_global**(gentype *_ptr_) + const global gentype * **to_global**(const gentype *_ptr_) | Returns a pointer that points to a region in the `global` address @@ -6370,11 +7460,10 @@ types supported by OpenCL C or a user defined type. [[async-copies]] -=== Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch +=== Async Copies From Global to Local Memory, Local to Global Memory, and Prefetch -[open,refpage='asyncCopyFunctions',desc='Async Copy Functions',type='freeform',spec='clang',anchor='async-copies',xrefs='',alias='async_work_group_copy async_work_group_strided_copy prefetch wait_group_events'] +[open,refpage='asyncCopyFunctions',desc='Async Copy Functions',type='freeform',spec='clang',anchor='async-copies',xrefs='',alias='async_work_group_copy async_work_group_strided_copy prefetch async_work_group_copy_fence wait_group_events'] -- - The OpenCL C programming language implements the <> that provide asynchronous copies between `global` and local memory and a prefetch from `global` memory. @@ -6396,26 +7485,37 @@ work-items in the work-group must execute the async copy or wait group events function on each iteration of the loop if any work-item executes the async copy or wait group events function on that iteration. -We use the generic type name `gentype` to indicate the built-in data types `char`, -`char__n__`, `uchar`, `uchar__n__`, `short`, `short__n__`, -`ushort`, `ushort__n__`, `int`, `int__n__`, `uint`, -`uint__n__`, `long` footnote:[{fn-int64-supported}], `long__n__`, -`ulong`, `ulong__n__`, `float`, `float__n__`, `double` -footnote:[{fn-double-supported}], and `double__n__` as the type for -the arguments unless otherwise stated. +The generic type name `gentype` indicates that the function can take any of + + * `char`, `char__n__`, `uchar`, or `uchar__n__` + * `short`, `short__n__`, `ushort`, or `ushort__n__` + * `int`, `int__n__`, `uint`, or `uint__n__` + * `long` footnote:[{fn-int64-supported}], `long__n__`, `ulong`, or + `ulong__n__` + * `float`, `float__n__` + * `double` footnote:[{fn-double-supported}] or `double__n__` +ifdef::cl_khr_fp16[] + * `half` footnote:[{fn-half-supported}] or `half__n__` + +NOTE: All functions taking or returning `half` types are supported only when +the `<>` extension macro is supported. +endif::cl_khr_fp16[] + +as the type for the arguments unless otherwise stated. _n_ is 2, 3 footnote:[{fn-vec3-async-copy}], 4, 8, or 16. [[table-builtin-async-copy]] .Built-in Async Copy and Prefetch Functions -[cols=",",] +[cols="1a,1",options="header",] |==== -| *Function* | *Description* +| Function | Description | event_t **async_work_group_copy**({local} gentype _*dst_, const {global} gentype *_src_, size_t _num_gentypes_, event_t _event_) + event_t **async_work_group_copy**({global} gentype _*dst_, const {local} gentype *_src_, size_t _num_gentypes_, event_t _event_) | Perform an async copy of _num_gentypes_ gentype elements from _src_ to _dst_. + Returns an event object that can be used by *wait_group_events* to wait for the async copy to finish. The _event_ argument can also be used to associate the @@ -6477,6 +7577,39 @@ _n_ is 2, 3 footnote:[{fn-vec3-async-copy}], 4, 8, or 16. cache. The prefetch instruction is applied to a work-item in a work-group and does not affect the functional behavior of the kernel. +ifdef::cl_khr_async_work_group_copy_fence[] +|[source,opencl_c] +---- +void async_work_group_copy_fence( + cl_mem_fence_flags flags) +---- + | Orders async copies produced by the work-items of a work-group + executing a kernel. + Async copies preceding the *async_work_group_copy_fence* must complete + their access to the designated memory or memories, including both + reads-from and writes-to it, before async copies following the fence + are allowed to start accessing these memories. + In other words, every async copy preceding the + *async_work_group_copy_fence* must happen-before every async copy + following the fence, with respect to the designated memory or + memories. + + The _flags_ argument specifies the memory address space and can be set + to a combination of the following literal values: + + `CLK_LOCAL_MEM_FENCE` + + `CLK_GLOBAL_MEM_FENCE` + + The async fence is performed by all work-items in a work-group and + this built-in function must therefore be encountered by all work-items + in a work-group executing the kernel with the same argument values; + otherwise the results are undefined. + This rule applies to ND-ranges implemented with uniform and + non-uniform work-groups. + + <> support for the + `<>` extension macro. +endif::cl_khr_async_work_group_copy_fence[] |==== [NOTE] @@ -6488,6 +7621,176 @@ is undefined. -- +ifdef::cl_khr_extended_async_copies[] +[[extended-async-copies]] +==== Extended Async Copy Functions + +[open,refpage='extendedAsyncCopyFunctions',desc='Extended Async Copy Functions',type='freeform',spec='clang',anchor='extended-async-copies',xrefs='',alias='async_work_group_copy_2D2D async_work_group_copy_3D3D'] +-- +If the `<>` extension macro is supported, +additional <> are provided which interpret the source and destination as 2D or +3D data. + +[NOTE] +==== +<> is a special +case of *async_work_group_copy_2D2D*, namely one which copies a single +column to a single line or vice versa. +For example: + +`async_work_group_strided_copy(dst, src, num_gentypes, src_stride, event)` +is equal to `async_work_group_copy_2D2D(dst, 0, src, 0, sizeof(gentype), 1, +num_gentypes, src_stride, 1, event)` +==== + +The functions described in this section support arbitrary `gentype`-based +buffers by casting pointers to `void*`. + +These functions do not perform any implicit synchronization of source data +such as using a *barrier* before performing the copy. + +These functions are performed by all work-items in a work-group and must +therefore be encountered by all work-items in a work-group executing the +kernel with the same argument values; otherwise the results are undefined. + +The _src_offset_, _dst_offset_, _src_total_line_length_, +_dst_total_line_length_, _src_total_plane_area_ and _dst_total_plane_area_ +function arguments are expressed in elements. + +Both _src_total_line_length_ and _dst_total_line_length_ describe the +number of elements between the beginning of the current line and the +beginning of the next line. + +Both _src_total_plane_area_ and _dst_total_plane_area_ describe the +number of elements between the beginning of the current plane and the +beginning of the next plane. + +These functions return an event object that can be used by +*wait_group_events* to wait for the async copy to finish. +The _event_ argument can also be used to associate the async copy with a +previous async copy allowing an event to be shared by multiple async copies; +otherwise _event_ should be zero. +If the _event_ argument is non-zero, the event object supplied as the +_event_ argument will be returned. + +[[table-builtin-extended-async-copy]] +.Built-in Extended Async Copy Functions +[cols="1a,1",options="header",] +|==== +| Function | Description +a| +[source,opencl_c] +---- +event_t async_work_group_copy_2D2D( + __local void *dst, + size_t dst_offset, + const __global void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t src_total_line_length, + size_t dst_total_line_length, + event_t event) + +event_t async_work_group_copy_2D2D( + __global void *dst, + size_t dst_offset, + const __local void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t src_total_line_length, + size_t dst_total_line_length, + event_t event) +---- + | Perform an async copy of (_num_elements_per_line_ * _num_lines_) + elements of size _num_bytes_per_element_ from (_src_ + (_src_offset_ * + _num_bytes_per_element_)) to (_dst_ + (_dst_offset_ * + _num_bytes_per_element_)). + All pointer arithmetic is performed with implicit casting to `char*` + by the implementation. + Each line contains _num_elements_per_line_ elements of size + _num_bytes_per_element_. + After each line of transfer, the _src_ address is incremented by + _src_total_line_length_ elements (i.e. _src_total_line_length_ * + _num_bytes_per_element_ bytes), and the _dst_ address is incremented + by _dst_total_line_length_ elements (i.e. _dst_total_line_length_ * + _num_bytes_per_element_ bytes), for the next line of transfer. + + The behavior of *async_work_group_copy_2D2D* is undefined if the + source or destination addresses exceed the upper bounds of the address + space during the copy. + + The behavior of *async_work_group_copy_2D2D* is also undefined if the + _src_total_line_length_ or _dst_total_line_length_ values are smaller + than _num_elements_per_line_, i.e. overlapping of lines is undefined. +a| +[source,opencl_c] +---- +event_t async_work_group_copy_3D3D( + __local void *dst, + size_t dst_offset, + const __global void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t num_planes, + size_t src_total_line_length, + size_t src_total_plane_area, + size_t dst_total_line_length, + size_t dst_total_plane_area, + event_t event) + +event_t async_work_group_copy_3D3D( + __global void *dst, + size_t dst_offset, + const __local void *src, + size_t src_offset, + size_t num_bytes_per_element, + size_t num_elements_per_line, + size_t num_lines, + size_t num_planes, + size_t src_total_line_length, + size_t src_total_plane_area, + size_t dst_total_line_length, + size_t dst_total_plane_area, + event_t event) +---- + | Perform an async copy of ((_num_elements_per_line_ * _num_lines_) * + _num_planes_) elements of size _num_bytes_per_element_ from (_src_ + + (_src_offset_ * _num_bytes_per_element_)) to (_dst_ + (_dst_offset_ * + _num_bytes_per_element_)), arranged in _num_planes_ planes. + All pointer arithmetic is performed with implicit casting to `char*` + by the implementation. + Each plane contains _num_lines_ lines. + Each line contains _num_elements_per_line_ elements. + After each line of transfer, the _src_ address is incremented by + _src_total_line_length_ elements (i.e. _src_total_line_length_ * + _num_bytes_per_element_ bytes), and the _dst_ address is incremented + by _dst_total_line_length_ elements (i.e. _dst_total_line_length_ * + _num_bytes_per_element_ bytes), for the next line of transfer. + + The behavior of *async_work_group_copy_3D3D* is undefined if the + source or destination addresses exceed the upper bounds of the address + space during the copy. + + The behavior of *async_work_group_copy_3D3D* is also undefined if the + _src_total_line_length_ or _dst_total_line_length_ values are smaller + than _num_elements_per_line_, i.e. overlapping of lines is undefined. + + The behavior of *async_work_group_copy_3D3D* is also undefined if + _src_total_plane_area_ is smaller than (_num_lines_ * + _src_total_line_length_), or _dst_total_plane_area_ is smaller than + (_num_lines_ * _dst_total_line_length_), i.e. overlapping of planes is + undefined. +|==== +-- +endif::cl_khr_extended_async_copies[] + + [[atomic-functions]] === Atomic Functions @@ -6589,7 +7892,7 @@ endif::refpageOnly[] [[the-atomic_var_init-macro]] -==== The `ATOMIC_VAR_INIT` macro +==== The `ATOMIC_VAR_INIT` Macro [open,refpage='ATOMIC_VAR_INIT',desc='ATOMIC_VAR_INIT macro',type='freeform',spec='clang',anchor='the-atomic_var_init-macro',xrefs='atomicFunctions atomic_init'] -- @@ -6623,7 +7926,7 @@ operation, constitutes a data-race. [[the-atomic_init-function]] -==== The atomic_init function +==== The atomic_init Function [open,refpage='atomic_init',desc='The atomic_init function',type='freeform',spec='clang',anchor='the-atomic_init-function',xrefs='atomicFunctions ATOMIC_VAR_INIT'] -- @@ -6673,9 +7976,9 @@ The following table lists the enumeration constants: [[table-memory-orders]] //.Memory Order Enumeration Constants -[cols=",",] +[cols=",",options="header",] |==== -| *Memory Order* | *Additional Notes* +| Memory Order | Additional Notes | `memory_order_relaxed` | <> support for OpenCL C 2.0 or newer. | `memory_order_acquire` @@ -6714,16 +8017,17 @@ The following table lists the enumeration constants: [[table-memory-scopes]] //.Memory Scope Enumeration Constants -[cols=",",] +[cols=",",options="header",] |==== -| *Memory Scope* | *Additional Notes* +| Memory Scope | Additional Notes | `memory_scope_work_item` | `memory_scope_work_item` can only be used with `atomic_work_item_fence` with flags set to `CLK_IMAGE_MEM_FENCE`. <> support for OpenCL C 2.0 or newer. | `memory_scope_sub_group` - | <> support for OpenCL C 3.0 or newer and the - {opencl_c_subgroups} feature. + | <> support for +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] + OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. | `memory_scope_work_group` | <> support for OpenCL C 2.0 or newer. | `memory_scope_device` @@ -6800,7 +8104,7 @@ NOTE: The use of memory order and scope enumerations must respect the [[atomic-integer-and-floating-point-types]] -==== Atomic integer and floating-point types +==== Atomic Integer and Floating-point Types [open,refpage='atomicTypes',desc='Atomic Integer And Floating-Point Types',type='freeform',spec='clang',anchor='atomic-integer-and-floating-point-types',xrefs='atomicFunctions',alias='atomic_int atomic_uint atomic_long atomic_ulong atomic_float atomic_double atomic_intptr_t atomic_uintptr_t atomic_size_t atomic_ptrdiff_t'] -- @@ -6829,7 +8133,7 @@ The atomic_flag type must be implemented as a 32-bit integer. [[operations-on-atomic-types]] -==== Operations on atomic types +==== Operations on Atomic Types There are only a few kinds of operations on atomic types, though there are many instances of those kinds. @@ -7374,7 +8678,7 @@ All of these operations are applicable to an object of any atomic integer type. The key, operator, and computation correspondence is given in table below: -[cols=",,",] +[cols=",,",options="header",] |==== | *key* | *op* | *computation* | `add` | *+* | addition @@ -7638,14 +8942,13 @@ C 2.0, or OpenCL C 3.0 or newer and the {opencl_c_generic_address_space} feature. -- + [[atomic-legacy]] ==== OpenCL C 1.x Legacy Atomics IMPORTANT: The atomic functions described in this sub-section <> support for OpenCL C 1.1 or newer, and are <> OpenCL C 2.0. Also see extensions -`cl_khr_global_int32_base_atomics`, `cl_khr_global_int32_extended_atomics`, -`cl_khr_local_int32_base_atomics`, and `cl_khr_local_int32_extended_atomics`. +deprecated by>> OpenCL C 2.0. OpenCL C 1.x had support for relaxed atomic operations via built-in functions that could operate on any memory address in `{global}` or `{local}` spaces. @@ -7664,168 +8967,362 @@ semantics of the minimum requirements. // Copied from table 6.19 in OpenCL 1.2 spec [[table-legacy-atomic-functions]] .Legacy Atomic Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int **atomic_add**(volatile {global} int *_p_, int _val_) + - int **atom_add**(volatile {global} int *_p_, int _val_) + + int **atom_add**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_add**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_add**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_add**(volatile {global} uint *_p_, uint _val_) + + uint **atom_add**(volatile {global} uint *_p_, uint _val_) int **atomic_add**(volatile {local} int *_p_, int _val_) + - int **atom_add**(volatile {local} int *_p_, int _val_) + + int **atom_add**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_add**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_add**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_add**(volatile {local} uint *_p_, uint _val_) + + uint **atom_add**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ + _val_) and store result at location pointed by _p_. The function returns _old_. | int **atomic_sub**(volatile {global} int *_p_, int _val_) + - int **atom_sub**(volatile {global} int *_p_, int _val_) + + int **atom_sub**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_sub**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_sub**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_sub**(volatile {global} uint *_p_, uint _val_) + + uint **atom_sub**(volatile {global} uint *_p_, uint _val_) int **atomic_sub**(volatile {local} int *_p_, int _val_) + - int **atom_sub**(volatile {local} int *_p_, int _val_) + + int **atom_sub**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_sub**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_sub**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_sub**(volatile {local} uint *_p_, uint _val_) + + uint **atom_sub**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ - _val_) and store result at location pointed by _p_. The function returns _old_. | int **atomic_xchg**(volatile {global} int *_p_, int _val_) + - int **atom_xchg**(volatile {global} int *_p_, int _val_) + + int **atom_xchg**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_xchg**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_xchg**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_xchg**(volatile {global} uint *_p_, uint _val_) + + uint **atom_xchg**(volatile {global} uint *_p_, uint _val_) float **atomic_xchg**(volatile {global} float *_p_, float _val_) + int **atomic_xchg**(volatile {local} int *_p_, int _val_) + - int **atom_xchg**(volatile {local} int *_p_, int _val_) + + int **atom_xchg**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_xchg**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_xchg**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_xchg**(volatile {local} uint *_p_, uint _val_) + + uint **atom_xchg**(volatile {local} uint *_p_, uint _val_) - float **atomic_xchg**(volatile {local} float *_p_, float _val_) + + float **atomic_xchg**(volatile {local} float *_p_, float _val_) | Swaps the _old_ value stored at location _p_ with new value given by _val_. Returns _old_ value. | int **atomic_inc**(volatile {global} int *_p_) + - int **atom_inc**(volatile {global} int *_p_) + + int **atom_inc**(volatile {global} int *_p_) - unsigned int **atomic_inc**(volatile {global} unsigned int *_p_) + - unsigned int **atom_inc**(volatile {global} unsigned int *_p_) + + uint **atomic_inc**(volatile {global} uint *_p_) + + uint **atom_inc**(volatile {global} uint *_p_) int **atomic_inc**(volatile {local} int *_p_) + - int **atom_inc**(volatile {local} int *_p_) + + int **atom_inc**(volatile {local} int *_p_) - unsigned int **atomic_inc**(volatile {local} unsigned int *_p_) + - unsigned int **atom_inc**(volatile {local} unsigned int *_p_) + + uint **atomic_inc**(volatile {local} uint *_p_) + + uint **atom_inc**(volatile {local} uint *_p_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ + 1) and store result at location pointed by _p_. The function returns _old_. | int **atomic_dec**(volatile {global} int *_p_) + - int **atom_dec**(volatile {global} int *_p_) + + int **atom_dec**(volatile {global} int *_p_) - unsigned int **atomic_dec**(volatile {global} unsigned int *_p_) + - unsigned int **atom_dec**({global} unsigned int *_p_) + + uint **atomic_dec**(volatile {global} uint *_p_) + + uint **atom_dec**({global} uint *_p_) int **atomic_dec**(volatile {local} int *_p_) + - int **atom_dec**(volatile {local} int *_p_) + + int **atom_dec**(volatile {local} int *_p_) - unsigned int **atomic_dec**(volatile {local} unsigned int *_p_) + - unsigned int **atom_dec**(volatile {local} unsigned int *_p_) + + uint **atomic_dec**(volatile {local} uint *_p_) + + uint **atom_dec**(volatile {local} uint *_p_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ - 1) and store result at location pointed by _p_. The function returns _old_. | int **atomic_cmpxchg**(volatile {global} int *_p_, int _cmp_, int _val_) + - int **atom_cmpxchg**(volatile {global} int *_p_, int _cmp_, int _val_) + + int **atom_cmpxchg**(volatile {global} int *_p_, int _cmp_, int _val_) - unsigned int **atomic_cmpxchg**(volatile {global} unsigned int *_p_, unsigned int _cmp_, unsigned int _val_) + - unsigned int **atom_cmpxchg**(volatile {global} unsigned int *_p_, unsigned int _cmp_, unsigned int _val_) + + uint **atomic_cmpxchg**(volatile {global} uint *_p_, uint _cmp_, uint _val_) + + uint **atom_cmpxchg**(volatile {global} uint *_p_, uint _cmp_, uint _val_) int **atomic_cmpxchg**(volatile {local} int *_p_, int _cmp_, int _val_) + - int **atom_cmpxchg**(volatile {local} int *_p_, int _cmp_, int _val_) + + int **atom_cmpxchg**(volatile {local} int *_p_, int _cmp_, int _val_) - unsigned int **atomic_cmpxchg**(volatile {local} unsigned int *_p_, unsigned int _cmp_, unsigned int _val_) + - unsigned int **atom_cmpxchg**(volatile {local} unsigned int *_p_, unsigned int _cmp_, unsigned int _val_) + + uint **atomic_cmpxchg**(volatile {local} uint *_p_, uint _cmp_, uint _val_) + + uint **atom_cmpxchg**(volatile {local} uint *_p_, uint _cmp_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ == _cmp_) ? _val_ : _old_ and store result at location pointed by _p_. The function returns _old_. | int **atomic_min**(volatile {global} int *_p_, int _val_) + - int **atom_min**(volatile {global} int *_p_, int _val_) + + int **atom_min**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_min**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_min**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_min**(volatile {global} uint *_p_, uint _val_) + + uint **atom_min**(volatile {global} uint *_p_, uint _val_) int **atomic_min**(volatile {local} int *_p_, int _val_) + - int **atom_min**(volatile {local} int *_p_, int _val_) + + int **atom_min**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_min**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_min**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_min**(volatile {local} uint *_p_, uint _val_) + + uint **atom_min**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute **min**(_old_, _val_) and store minimum value at location pointed by _p_. The function returns _old_. | int **atomic_max**(volatile {global} int *_p_, int _val_) + - int **atom_max**(volatile {global} int *_p_, int _val_) + + int **atom_max**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_max**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_max**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_max**(volatile {global} uint *_p_, uint _val_) + + uint **atom_max**(volatile {global} uint *_p_, uint _val_) int **atomic_max**(volatile {local} int *_p_, int _val_) + - int **atom_max**(volatile {local} int *_p_, int _val_) + + int **atom_max**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_max**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_max**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_max**(volatile {local} uint *_p_, uint _val_) + + uint **atom_max**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute **max**(_old_, _val_) and store maximum value at location pointed by _p_. The function returns _old_. | int **atomic_and**(volatile {global} int *_p_, int _val_) + - int **atom_and**(volatile {global} int *_p_, int _val_) + + int **atom_and**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_and**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_and**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_and**(volatile {global} uint *_p_, uint _val_) + + uint **atom_and**(volatile {global} uint *_p_, uint _val_) int **atomic_and**(volatile {local} int *_p_, int _val_) + - int **atom_and**(volatile {local} int *_p_, int _val_) + + int **atom_and**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_and**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_and**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_and**(volatile {local} uint *_p_, uint _val_) + + uint **atom_and**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ & _val_) and store result at location pointed by _p_. The function returns _old_. | int **atomic_or**(volatile {global} int *_p_, int _val_) + - int **atom_or**(volatile {global} int *_p_, int _val_) + + int **atom_or**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_or**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_or**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_or**(volatile {global} uint *_p_, uint _val_) + + uint **atom_or**(volatile {global} uint *_p_, uint _val_) int **atomic_or**(volatile {local} int *_p_, int _val_) + - int **atom_or**(volatile {local} int *_p_, int _val_) + + int **atom_or**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_or**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_or**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_or**(volatile {local} uint *_p_, uint _val_) + + uint **atom_or**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ \| _val_) and store result at location pointed by _p_. The function returns _old_. | int **atomic_xor**(volatile {global} int *_p_, int _val_) + - int **atom_xor**(volatile {global} int *_p_, int _val_) + + int **atom_xor**(volatile {global} int *_p_, int _val_) - unsigned int **atomic_xor**(volatile {global} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_xor**(volatile {global} unsigned int *_p_, unsigned int _val_) + + uint **atomic_xor**(volatile {global} uint *_p_, uint _val_) + + uint **atom_xor**(volatile {global} uint *_p_, uint _val_) int **atomic_xor**(volatile {local} int *_p_, int _val_) + - int **atom_xor**(volatile {local} int *_p_, int _val_) + + int **atom_xor**(volatile {local} int *_p_, int _val_) - unsigned int **atomic_xor**(volatile {local} unsigned int *_p_, unsigned int _val_) + - unsigned int **atom_xor**(volatile {local} unsigned int *_p_, unsigned int _val_) + + uint **atomic_xor**(volatile {local} uint *_p_, uint _val_) + + uint **atom_xor**(volatile {local} uint *_p_, uint _val_) | Read the 32-bit value (referred to as _old_) stored at location pointed by _p_. Compute (_old_ ^ _val_) and store result at location pointed by _p_. The function returns _old_. |==== +ifdef::cl_khr_global_int32_base_atomics,cl_khr_global_int32_extended_atomics,cl_khr_local_int32_base_atomics,cl_khr_local_int32_extended_atomics[] +A subset of the atomic functions described above are also supported in +OpenCL 1.0 when appropriate OpenCL extension macros are supported, as +described in the <> table below. + +[[table-atomic-function-extensions]] +.Atomic Function Extensions +[cols=",",options="header",] +|==== +| Extension Macro | Supported Functions +ifdef::cl_khr_global_int32_base_atomics[] +| `<>` + | **atom_add** + + **atom_sub** + + **atom_xchg** + + **atom_inc** + + **atom_dec** + + **atom_cmpxchg** + + (with {global} parameters) +endif::cl_khr_global_int32_base_atomics[] +ifdef::cl_khr_global_int32_extended_atomics[] +| `<>` + | **atom_min** + + **atom_max** + + **atom_and** + + **atom_or** + + **atom_xor** + + (with {global} parameters) +endif::cl_khr_global_int32_extended_atomics[] +ifdef::cl_khr_local_int32_base_atomics[] +| `<>` + | **atom_add** + + **atom_sub** + + **atom_xchg** + + **atom_inc** + + **atom_dec** + + **atom_cmpxchg** + + (with {local} parameters) +endif::cl_khr_local_int32_base_atomics[] +ifdef::cl_khr_local_int32_extended_atomics[] +| `<>` + | **atom_min** + + **atom_max** + + **atom_and** + + **atom_or** + + **atom_xor** + + (with {local} parameters) +endif::cl_khr_local_int32_extended_atomics[] +|==== +endif::cl_khr_global_int32_base_atomics,cl_khr_global_int32_extended_atomics,cl_khr_local_int32_base_atomics,cl_khr_local_int32_extended_atomics[] + + +ifdef::cl_khr_int64_base_atomics,cl_khr_int64_extended_atomics[] +[[atomic-legacy-int64]] +==== Legacy 64-Bit Atomic Extensions + +Similar to the <>, atomic +functions operating on 64-bit integers are provided by extensions. + +ifdef::cl_khr_int64_base_atomics[] +If the `<>` extension macro is supported, it +provides the functions described in the <> table below. + +[[table-atomic-int64-base]] +.Built-in 64-Bit Base Atomic Functions +[cols="9,5",options="header",] +|==== +|*Function* |*Description* +| long **atom_add** (volatile {global} long *_p_, long _val_) + + long **atom_add** (volatile {local} long *_p_, long _val_) + + ulong **atom_add** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_add** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ + _val_) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_sub** (volatile {global} long *_p_, long _val_) + + long **atom_sub** (volatile {local} long *_p_, long _val_) + + ulong **atom_sub** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_sub** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ - _val_) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_xchg** (volatile {global} long *_p_, long _val_) + + long **atom_xchg** (volatile {local} long *_p_, long _val_) + + ulong **atom_xchg** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_xchg** (volatile {local} ulong *_p_, ulong _val_) + | Swaps the _old_ value stored at location _p_ with new value given by + _val_. + Returns _old_ value. +| long **atom_inc** (volatile {global} long *_p_) + + long **atom_inc** (volatile {local} long *_p_) + + ulong **atom_inc** (volatile {global} ulong *_p_) + + ulong **atom_inc** (volatile {local} ulong *_p_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ + _1_) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_dec** (volatile {global} long *_p_) + + long **atom_dec** (volatile {local} long *_p_) + + ulong **atom_dec** (volatile {global} ulong *_p_) + + ulong **atom_dec** (volatile {local} ulong *_p_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ - _1_) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_cmpxchg** (volatile {global} long *_p_, long _cmp_, long _val_) + + long **atom_cmpxchg** (volatile {local} long *_p_, long _cmp_, long _val_) + + ulong **atom_cmpxchg** (volatile {global} ulong *_p_, ulong _cmp_, ulong _val_) + + ulong **atom_cmpxchg** (volatile {local} ulong *_p_, ulong _cmp_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ == _cmp_) ? _val_ : _old_ and store result at location + pointed by _p_. + The function returns _old_. +|==== + +endif::cl_khr_int64_base_atomics[] + +ifdef::cl_khr_int64_extended_atomics[] +If the `<>` extension macro is supported, it +provides the functions described in the <> table below. + +[[table-atomic-int64-extended]] +.Built-in 64-Bit Extended Atomic Functions +[cols=",",options="header",] +|==== +|*Function* |*Description* +| long **atom_min** (volatile {global} long *_p_, long _val_) + + long **atom_min** (volatile {local} long *_p_, long _val_) + + ulong **atom_min** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_min** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute *min*(_old_, _val_) and store minimum value at location + pointed by _p_. + The function returns _old_. +| long **atom_max** (volatile {global} long *_p_, long _val_) + + long **atom_max** (volatile {local} long *_p_, long _val_) + + ulong **atom_max** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_max** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute *max*(_old_, _val_) and store maximum value at location + pointed by _p_. + The function returns _old_. +| long **atom_and** (volatile {global} long *_p_, long _val_) + + long **atom_and** (volatile {local} long *_p_, long _val_) + + ulong **atom_and** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_and** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ & val) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_or** (volatile {global} long *_p_, long _val_) + + long **atom_or** (volatile {local} long *_p_, long _val_) + + ulong **atom_or** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_or** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ \| val) and store result at location pointed by _p_. + The function returns _old_. +| long **atom_xor** (volatile {global} long *_p_, long _val_) + + long **atom_xor** (volatile {local} long *_p_, long _val_) + + ulong **atom_xor** (volatile {global} ulong *_p_, ulong _val_) + + ulong **atom_xor** (volatile {local} ulong *_p_, ulong _val_) + | Read the 64-bit value (referred to as _old_) stored at location + pointed by _p_. + Compute (_old_ ^ val) and store result at location pointed by _p_. + The function returns _old_. +|==== +endif::cl_khr_int64_extended_atomics[] + +NOTE: Atomic operations on 64-bit integers and 32-bit integers (and floats) +are also atomic with respect to each other. + +endif::cl_khr_int64_base_atomics,cl_khr_int64_extended_atomics[] + + [[atomic-restrictions]] ==== Restrictions @@ -7864,8 +9361,9 @@ semantics of the minimum requirements. <> support for OpenCL C 2.0, or OpenCL C 3.0 or newer and the {opencl_c_atomic_order_seq_cst} feature. * Using `memory_scope_sub_group` with any built-in atomic function - <> support for OpenCL C 3.0 or newer and the - {opencl_c_subgroups} feature. + <> support for +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] + OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. * Using `memory_scope_device` <> support for OpenCL C 2.0, or OpenCL C 3.0 or newer and the {opencl_c_atomic_scope_device} feature. @@ -7898,9 +9396,9 @@ _n_ is 2, 4, 8, or 16. [[table-misc-vector]] .Built-in Miscellaneous Vector Functions -[cols="1,2",] +[cols="1,2",options="header",] |==== -| *Function* | *Description* +| Function | Description | int *vec_step*(gentype__n__ _a_) + int *vec_step*(char3 _a_) + int *vec_step*(uchar3 _a_) + @@ -8001,9 +9499,9 @@ The OpenCL C programming language implements the *printf* function. [[table-printf]] .Built-in printf Function -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int **printf**(constant char *restrict _format_, ...) | The *printf* built-in function writes output to an implementation-defined stream such as stdout under control of the @@ -8022,7 +9520,7 @@ The OpenCL C programming language implements the *printf* function. [[printf-output-synchronization]] -==== printf output synchronization +==== printf Output Synchronization When the event that is associated with a particular kernel invocation is completed, the output of all printf() calls executed by this kernel @@ -8038,7 +9536,7 @@ For example, it is valid for the output of a work-item with a global id [[printf-format-string]] -==== printf format string +==== printf Format String The format shall be a character sequence, beginning and ending in its initial shift state. @@ -8204,7 +9702,7 @@ characters. *o,u,* -*x,X* The `unsigned int`, `uchar__n__`, `ushort__n__`, `uint__n__` or +*x,X* The `uint`, `uchar__n__`, `ushort__n__`, `uint__n__` or `ulong__n__` argument is converted to unsigned octal (*o*), unsigned decimal (*u*), or unsigned hexadecimal notation (*x* or *X*) in the style _dddd_; the letters *abcdef* are used for *x* conversion and the letters *ABCDEF* @@ -8294,11 +9792,10 @@ specifier. [NOTE] ==== The conversion specifiers *e,E,g,G,a,A* convert a `float` or `half` argument -that is a scalar type to a `double` only if the `double` data type is -supported, e.g. for OpenCL C 3.0 or newer the {opencl_c_fp64} feature -macro is present. -If the `double` data type is not supported, the argument will be a `float` -instead of a `double` and the `half` type will be converted to a `float`. +that is a scalar type to a `double` only if <>. +Otherwise, the argument will be a `float` instead of a `double` and the +`half` type will be converted to a `float`. ==== *c* The `int` argument is converted to an `unsigned char`, and the resulting @@ -8395,7 +9892,7 @@ kernel void my_kernel(global char *s, ... ) [[differences-between-opencl-c-and-c99-printf]] -==== Differences between OpenCL C and C99 printf +==== Differences Between OpenCL C and C99 printf * The *l* modifier followed by a *c* conversion specifier or *s* conversion specifier is not supported by OpenCL C. @@ -8515,9 +10012,9 @@ The sampler fields are described in the following table. [[table-sampler-descriptor]] .Sampler Descriptor -[cols=",",] +[cols=",",options="header",] |==== -| *Sampler State* | *Description* +| Sampler State | Description | `` | Specifies whether the _x_, _y_ and _z_ coordinates are passed in as normalized or unnormalized values. @@ -8590,7 +10087,7 @@ queried using the `CL_DEVICE_MAX_SAMPLERS` token in *clGetDeviceInfo*. [[determining-the-border-color-or-value]] -===== *Determining the border color or value* +===== *Determining the Border Color or Value* If `` in sampler is `CLK_ADDRESS_CLAMP`, then out-of-range image coordinates return the border color. @@ -8624,15 +10121,24 @@ The alpha component is returned as is. [open,refpage='imageReadFunctions',desc='Built-in Image Read Functions',type='freeform',spec='clang',anchor='built-in-image-read-functions',xrefs='imageQueryFunctions imageSamplerlessReadFunctions imageWriteFunctions',alias='read_imagef read_imagei read_imageui'] -- - The following built-in function calls to read images with a sampler are supported footnote:[{fn-read-image-with-sampler}]. +ifdef::cl_khr_mipmap_image[] +If the `<>` extension macro is supported, read +functions which do not either + + * explicitly specify a level of detail _lod_, or + * compute a level of detail from _gradient_ parameters + +read from mip level 0 if _image_ is a mipmapped image. +endif::cl_khr_mipmap_image[] + [[table-image-read]] .Built-in Image Read Functions -[cols=",",] +[cols=",",,options="header",] |==== -| *Function* | *Description* +| Function | Description | float4 *read_imagef*(read_only image2d_t _image_, sampler_t _sampler_, int2 _coord_) + float4 *read_imagef*(read_only image2d_t _image_, sampler_t _sampler_, @@ -8661,6 +10167,40 @@ supported footnote:[{fn-read-image-with-sampler}]. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description above are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(read_only image2d_t _image_, sampler_t _sampler_, + int2 _coord_) + + half4 *read_imageh*(read_only image2d_t _image_, sampler_t _sampler_, + float2 _coord_) + | Use the coordinate _(coord.x, coord.y)_ to do an element lookup in the + 2D image object specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed + formats or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + The *read_imageh* calls that take integer coordinates must use a + sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized + coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode + set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or + `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(read_only image2d_t _image_, sampler_t _sampler_, int2 _coord_) + @@ -8736,6 +10276,41 @@ supported footnote:[{fn-read-image-with-sampler}]. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(read_only image3d_t _image_, sampler_t _sampler_, + int4 _coord_ ) + + half4 *read_imageh*(read_only image3d_t _image_, sampler_t _sampler_, + float4 _coord_) + | Use the coordinate _(coord.x_, _coord.y_, _coord.z)_ to do an + elementlookup in the 3D image object specified by _image_. + _coord.w_ is ignored. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + **read_imageh** returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + The *read_imageh* calls that take integer coordinates must use a + sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized + coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode + set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or + `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description are + undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(read_only image3d_t _image_, sampler_t _sampler_, int4 _coord_) + @@ -8791,16 +10366,16 @@ supported footnote:[{fn-read-image-with-sampler}]. _coord.z_ in the 2D image array specified by _image_. *read_imagef* returns floating-point values in the range [0.0, 1.0] - for image objects created with image_channel_data_type set to one of + for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. *read_imagef* returns floating-point values in the range [-1.0, 1.0] - for image objects created with image_channel_data_type set to + for image objects created with _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. *read_imagef* returns floating-point values for image objects created - with image_channel_data_type set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. The *read_imagef* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -8809,8 +10384,43 @@ supported footnote:[{fn-read-image-with-sampler}]. `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. Values returned by *read_imagef* for image objects with - image_channel_data_type values not specified in the description above + _image_channel_data_type_ values not specified in the description above + are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(read_only image2d_array_t _image_, sampler_t + _sampler_, int4 _coord_) + + half4 *read_imageh*(read_only image2d_array_t _image_, sampler_t + _sampler_, float4 _coord_) + | Use _coord.xy_ to do an element lookup in the 2D image identified by + _coord.z_ in the 2D image array specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + The *read_imageh* calls that take integer coordinates must use a + sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized + coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode + set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or + `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] +| | | int4 *read_imagei*(read_only image2d_array_t _image_, sampler_t _sampler_, int4 _coord_) + int4 *read_imagei*(read_only image2d_array_t _image_, sampler_t _sampler_, @@ -8886,6 +10496,40 @@ supported footnote:[{fn-read-image-with-sampler}]. above are undefined. <> support for OpenCL C 1.2 or newer. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(read_only image1d_t _image_, sampler_t _sampler_, + int _coord_) + + half4 *read_imageh*(read_only image1d_t _image_, sampler_t _sampler_, + float _coord_) + | Use _coord_ to do an element lookup in the 1D image object specified + by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + The *read_imageh* calls that take integer coordinates must use a + sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized + coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode + set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or + `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(read_only image1d_t _image_, sampler_t _sampler_, int _coord_) + @@ -8942,16 +10586,16 @@ supported footnote:[{fn-read-image-with-sampler}]. _coord.y_ in the 1D image array specified by _image_. *read_imagef* returns floating-point values in the range [0.0, 1.0] - for image objects created with image_channel_data_type set to one of + for image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. *read_imagef* returns floating-point values in the range [-1.0, 1.0] - for image objects created with image_channel_data_type set to + for image objects created with _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. *read_imagef* returns floating-point values for image objects created - with image_channel_data_type set to `CL_HALF_FLOAT` or `CL_FLOAT`. + with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. The *read_imagef* calls that take integer coordinates must use a sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized @@ -8960,10 +10604,45 @@ supported footnote:[{fn-read-image-with-sampler}]. `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. Values returned by *read_imagef* for image objects with - image_channel_data_type values not specified in the description above + _image_channel_data_type_ values not specified in the description above are undefined. <> support for OpenCL C 1.2 or newer. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(read_only image1d_array_t _image_, + sampler_t _sampler_, int2 _coord_) + + half4 *read_imageh*(read_only image1d_array_t _image_, + sampler_t _sampler_, float2 _coord_) + | Use _coord.x_ to do an element lookup in the 1D image identified by + _coord.y_ in the 1D image array specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + The *read_imageh* calls that take integer coordinates must use a + sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized + coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode + set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or + `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description above + are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] +| | | int4 *read_imagei*(read_only image1d_array_t _image_, sampler_t _sampler_, int2 _coord_) + int4 *read_imagei*(read_only image1d_array_t _image_, sampler_t _sampler_, @@ -9035,8 +10714,8 @@ supported footnote:[{fn-read-image-with-sampler}]. _image_channel_data_type_ values not specified in the description above are undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | | float *read_imagef*(read_only image2d_array_depth_t _image_, sampler_t _sampler_, int4 _coord_) + @@ -9062,19 +10741,341 @@ supported footnote:[{fn-read-image-with-sampler}]. _image_channel_data_type_ values not specified in the description above are undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | + +ifdef::cl_khr_mipmap_image[] +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float lod) + +int4 read_imagei( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float lod) + +uint4 read_imageui( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float lod) + +float read_imagef( + read_only image2d_depth_t image, + sampler_t sampler, + float2 coord, + float lod) +---- + | Use the coordinate _coord.xy_ to do an element lookup in the mip level + specified by _lod_ in the 2D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) + +int4 read_imagei( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) + +uint4 read_imageui( + read_only image2d_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) + +float read_imagef( + read_only image2d_depth_t image, + sampler_t sampler, + float2 coord, + float2 gradient_x, + float2 gradient_y) +---- + | Use the gradients to compute the lod and coordinate _coord.xy_ to do + an element lookup in the mip level specified by the computed lod in + the 2D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image1d_t image, + sampler_t sampler, + float coord, + float lod) + +int4 read_imagei( + read_only image1d_t image, + sampler_t sampler, + float coord, + float lod) + +uint4 read_imageui( + read_only image1d_t image, + sampler_t sampler, + float coord, + float lod) +---- + | Use the coordinate _coord_ to do an element lookup in the mip level + specified by _lod_ in the 1D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image1d_t image, + sampler_t sampler, + float coord, + float gradient_x, + float gradient_y) + +int4 read_imagei( + read_only image1d_t image, + sampler_t sampler, + float coord, + float gradient_x, + float gradient_y) + +uint4 read_imageui( + read_only image1d_t image, + sampler_t sampler, + float coord, + float gradient_x, + float gradient_y) +---- + | Use the gradients to compute the lod and coordinate _coord_ to do an + element lookup in the mip level specified by the computed lod in the + 1D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float lod) + +int4 read_imagei( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float lod) + +uint4 read_imageui( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float lod) +---- + | Use the coordinate _coord.xyz_ to do an element lookup in the mip + level specified by _lod_ in the 3D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float4 gradient_x, + float4 gradient_y) + +int4 read_imagei( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float4 gradient_x, + float4 gradient_y) + +uint4 read_imageui( + read_only image3d_t image, + sampler_t sampler, + float4 coord, + float4 gradient_x, + float4 gradient_y) +---- + | Use the gradients to compute the lod and coordinate _coord.xyz_ to do + an element lookup in the mip level specified by the computed lod in + the 3D image object specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float lod) + +int4 read_imagei( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float lod) + +uint4 read_imageui( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float lod) +---- + | Use the coordinate _coord.x_ to do an element lookup in the 1D image + identified by _coord.x_ and mip level specified by _lod_ in the 1D + image array specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float gradient_x, + float gradient_y) + +int4 read_imagei( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float gradient_x, + float gradient_y) + +uint4 read_imageui( + read_only image1d_array_t image, + sampler_t sampler, + float2 coord, + float gradient_x, + float gradient_y) +---- + | Use the gradients to compute the lod and coordinate _coord.x_ to do an + element lookup in the mip level specified by the computed lod in the + 1D image array specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float lod) + +int4 read_imagei( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float lod) + +uint4 read_imageui( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float lod) + +float read_imagef( + read_only image2d_array_depth_t image, + sampler_t sampler, + float4 coord, + float lod) +---- + | Use the coordinate _coord.xy_ to do an element lookup in the 2D image + identified by _coord.z_ and mip level specified by _lod_ in the 2D + image array specified by _image_. + + <> support for the `<>` + extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) + +int4 read_imagei( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) + +uint4 read_imageui( + read_only image2d_array_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) + +float read_imagef( + read_only image2d_array_depth_t image, + sampler_t sampler, + float4 coord, + float2 gradient_x, + float2 gradient_y) +---- + | Use the gradients to compute the lod coordinate and _coord.xy_ to do + an element lookup in the 2D image identified by _coord.z_ and mip + level specified by the computed lod in the 2D image array specified by + _image_. + + <> support for the `<>` + extension macro. +endif::cl_khr_mipmap_image[] + |==== -- +ifdef::cl_khr_mipmap_image[] +NOTE: If the `<>` extension macro is supported, +`CL_SAMPLER_NORMALIZED_COORDS` must be `CL_TRUE` for built-in functions +described in the table above that read from a mipmapped image; otherwise +behavior is undefined. +The value specified in the _lod_ argument is clamped to the minimum of +(actual number of mip levels - 1) in the image or the value specified for +`CL_SAMPLER_LOD_MAX`. +endif::cl_khr_mipmap_image[] + [[built-in-image-sampler-less-read-functions]] ==== Built-in Image Sampler-less Read Functions [open,refpage='imageSamplerlessReadFunctions',desc='Built-in Image Sampler-less Read Functions',type='freeform',spec='clang',anchor='built-in-image-sampler-less-read-functions',xrefs='imageQueryFunctions imageReadFunctions imageWriteFunctions'] -- - NOTE: Sampler-less image read functions <> support for OpenCL C 1.2 or newer, with some functions requiring support for newer versions of OpenCL C as noted in the <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] +| | +| int4 *read_imagei*(_aQual_ image2d_t _image_, int2 _coord_) + + uint4 *read_imageui*(_aQual_ image2d_t _image_, int2 _coord_) + | Use the coordinate (_coord.x_, _coord.y_) to do an element lookup in + the 2D image object specified by _image_. + + *read_imagei* and *read_imageui* return unnormalized signed integer + and unsigned integer values respectively. Each channel will be stored + in a 32-bit integer. *read_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: @@ -9169,6 +11195,32 @@ For samplerless read functions this may be `read_only` or `read_write`. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(_aQual_ image3d_t _image_, int4 _coord_ ) + | Use the coordinate _(coord.x_, _coord.y_, _coord.z)_ to do an element + lookup in the 3D image object specified by _image_. _coord.w_ is + ignored. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description are + undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(_aQual_ image3d_t _image_, int4 _coord_) + uint4 *read_imageui*(_aQual_ image3d_t _image_, int4 _coord_) @@ -9219,6 +11271,31 @@ For samplerless read functions this may be `read_only` or `read_write`. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description above are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(_aQual_ image2d_array_t _image_, int4 _coord_) + | Use _coord.xy_ to do an element lookup in the 2D image identified by + _coord.z_ in the 2D image array specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(_aQual_ image2d_array_t _image_, int4 _coord_) + uint4 *read_imageui*(_aQual_ image2d_array_t _image_, int4 _coord_) @@ -9269,6 +11346,32 @@ For samplerless read functions this may be `read_only` or `read_write`. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description above are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(_aQual_ image1d_t _image_, int _coord_) + + half4 *read_imageh*(_aQual_ image1d_buffer_t _image_, int _coord_) + | Use _coord_ to do an element lookup in the 1D image or 1D image buffer + object specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(_aQual_ image1d_t _image_, int _coord_) + uint4 *read_imageui*(_aQual_ image1d_t _image_, int _coord_) + @@ -9320,6 +11423,31 @@ For samplerless read functions this may be `read_only` or `read_write`. Values returned by *read_imagef* for image objects with _image_channel_data_type_ values not specified in the description above are undefined. +ifdef::cl_khr_fp16[] +| | +| half4 *read_imageh*(_aQual_ image1d_array_t _image_, int2 _coord_) + | Use _coord.x_ to do an element lookup in the 2D image identified by + _coord.y_ in the 2D image array specified by _image_. + + *read_imageh* returns half-precision floating-point values in the + range [0.0, 1.0] for image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats + or `CL_UNORM_INT8`, or `CL_UNORM_INT16`. + + *read_imageh* returns half-precision floating-point values in the + range [-1.0, 1.0] for image objects created with + _image_channel_data_type_ set to `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imageh* returns half-precision floating-point values for image + objects created with _image_channel_data_type_ set to `CL_HALF_FLOAT`. + + Values returned by *read_imageh* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the `<>` extension + macro. +endif::cl_khr_fp16[] | | | int4 *read_imagei*(_aQual_ image1d_array_t _image_, int2 _coord_) + uint4 *read_imageui*(_aQual_ image1d_array_t _image_, int2 _coord_) @@ -9365,8 +11493,8 @@ For samplerless read functions this may be `read_only` or `read_write`. _image_channel_data_type_ values not specified in the description above are undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | | float *read_imagef*(_aQual_ image2d_array_depth_t _image_, int4 _coord_) | Use _coord.xy_ to do an element lookup in the 2D image identified by @@ -9383,9 +11511,211 @@ For samplerless read functions this may be `read_only` or `read_write`. _image_channel_data_type_ values not specified in the description above are undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | + +ifdef::cl_khr_gl_msaa_sharing[] +a| +[source,opencl_c] +---- +float4 read_imagef( + image2d_msaa_t image, + int2 coord, + int sample) +---- + | Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element + lookup in the 2D image object specified by _image_. + + *read_imagef* returns floating-point values in the range [0.0, 1.0] + for image objects created with _image_channel_data_type_ set to one of + the pre-defined packed formats or `CL_UNORM_INT8`, or + `CL_UNORM_INT16`. + + *read_imagef* returns floating-point values in the range [-1.0, 1.0] + for image objects created with _image_channel_data_type_ set to + `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imagef* returns floating-point values for image objects created + with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + + Values returned by *read_imagef* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +int4 read_imagei(image2d_msaa_t image, + int2 coord, + int sample) + +uint4 read_imageui(image2d_msaa_t image, + int2 coord, + int sample) +---- + | Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element + lookup in the 2D image object specified by _image_. + + *read_imagei* and *read_imageui* return unnormalized signed integer + and unsigned integer values respectively. + Each channel will be stored in a 32-bit integer. + + *read_imagei* can only be used with image objects created with + _image_channel_data_type_ set to one of the following values: + + * `CL_SIGNED_INT8`, + * `CL_SIGNED_INT16`, and + * `CL_SIGNED_INT32`. + + If the _image_channel_data_type_ is not one of the above values, the + values returned by *read_imagei* are undefined. + + *read_imageui* can only be used with image objects created with + _image_channel_data_type_ set to one of the following values: + + * `CL_UNSIGNED_INT8`, + * `CL_UNSIGNED_INT16`, and + * `CL_UNSIGNED_INT32`. + + If the _image_channel_data_type_ is not one of the above values, the + values returned by *read_imageui* are undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +float4 read_imagef(image2d_array_msaa_t image, + int4 coord, + int sample) +---- + | Use _coord.xy_ and _sample_ to do an element lookup in the 2D image + identified by _coord.z_ in the 2D image array specified by _image_. + + *read_imagef* returns floating-point values in the range [0.0, 1.0] + for image objects created with _image_channel_data_type_ set to one of + the pre-defined packed formats or `CL_UNORM_INT8`, or + `CL_UNORM_INT16`. + + *read_imagef* returns floating-point values in the range [-1.0, 1.0] + for image objects created with _image_channel_data_type_ set to + `CL_SNORM_INT8`, or `CL_SNORM_INT16`. + + *read_imagef* returns floating-point values for image objects created + with _image_channel_data_type_ set to `CL_HALF_FLOAT` or `CL_FLOAT`. + + Values returned by *read_imagef* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +int4 read_imagei(image2d_array_msaa_t image, + int4 coord, + int sample) + +uint4 read_imageui(image2d_array_msaa_t image, + int4 coord, + int sample) +---- + | Use _coord.xy_ and _sample_ to do an element lookup in the 2D image + identified by _coord.z_ in the 2D image array specified by _image_. + + *read_imagei* and *read_imageui* return unnormalized signed integer + and unsigned integer values respectively. + Each channel will be stored in a 32-bit integer. + + *read_imagei* can only be used with image objects created with + _image_channel_data_type_ set to one of the following values: + + * `CL_SIGNED_INT8`, + * `CL_SIGNED_INT16`, and + * `CL_SIGNED_INT32`. + + If the _image_channel_data_type_ is not one of the above values, the + values returned by *read_imagei* are undefined. + + *read_imageui* can only be used with image objects created with + _image_channel_data_type_ set to one of the following values: + + * `CL_UNSIGNED_INT8`, + * `CL_UNSIGNED_INT16`, and + * `CL_UNSIGNED_INT32`. + + If the _image_channel_data_type_ is not one of the above values, the + values returned by *read_imageui* are undefined. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +float read_imagef(image2d_msaa_depth_t image, + int2 coord, + int sample) +---- + | Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element + lookup in the 2D depth image object specified by _image_. + + *read_imagef* returns a floating-point value in the range [0.0, 1.0] + for depth image objects created with _image_channel_data_type_ set to + `CL_UNORM_INT16` or `CL_UNORM_INT24`. + + *read_imagef* returns a floating-point value for depth image objects + created with _image_channel_data_type_ set to `CL_FLOAT`. + + Values returned by *read_imagef* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + <> support for the + `<>` extension macro. +a| +[source,c] +---- +float read_imagef(image2d_array_msaaa_depth_t image, + int4 coord, + int sample) +---- + | Use _coord.xy_ and _sample_ to do an element lookup in the 2D image + identified by _coord.z_ in the 2D depth image array specified by + _image_. + + *read_imagef* returns a floating-point value in the range [0.0, 1.0] + for depth image objects created with _image_channel_data_type_ set to + `CL_UNORM_INT16` or `CL_UNORM_INT24`. + + *read_imagef* returns a floating-point value for depth image objects + created with _image_channel_data_type_ set to `CL_FLOAT`. + + Values returned by *read_imagef* for image objects with + _image_channel_data_type_ values not specified in the description + above are undefined. + + Note: When a multisample image is accessed in a kernel, the access + takes one vector of integers describing which pixel to fetch and an + integer corresponding to the sample numbers describing which sample + within the pixel to fetch. + _sample_ identifies the sample position in the multi-sample image. + + *For best performance, we recommend that _sample_ be a literal value + so it is known at compile time and the OpenCL compiler can perform + appropriate optimizations for multi-sample reads on the device*. + + No standard sampling instructions are allowed on the multisample + image. Accessing a coordinate outside the image and/or a sample that + is outside the number of samples associated with each pixel in the + image is undefined + + <> support for the + `<>` extension macro. +endif::cl_khr_gl_msaa_sharing[] |==== -- @@ -9395,18 +11725,39 @@ For samplerless read functions this may be `read_only` or `read_write`. [open,refpage='imageWriteFunctions',desc='Built-in Image Write Functions',type='freeform',spec='clang',anchor='built-in-image-write-functions',xrefs='imageQueryFunctions imageReadFunctions imageSamplerlessReadFunctions',alias='write_imagef write_imagei write_imageui'] -- - The following built-in function calls to write images are supported. _aQual_ in the following table refers to one of the access qualifiers. For write functions this may be `write_only` or `read_write`. +ifdef::cl_khr_mipmap_image_writes[] +If the `<>` extension macro is supported, write +functions which do not explicitly specify a level of detail _lod_ write to +mip level 0 if _image_ is a mipmapped image. +_mipwidth_, _mipheight_, and _mipdepth_ in the table refer to the width, +height, and depth of the _image_ mip level specified by _lod_ respectively; +_miplayers_ refers to the number of layers in _image_; and _miplevels_ +refers to the number of mip levels in _image_. +endif::cl_khr_mipmap_image_writes[] + +ifdef::cl_khr_srgb_image_writes[] +If the `<>` extension macro is supported, the +*write_imagef* functions described below may write to sRGB images. +Linear to sRGB conversion is performed by the function. +Only the R, G, and B components are converted from linear to sRGB; the A +component is written as-is. +endif::cl_khr_srgb_image_writes[] + + [[table-image-write]] .Built-in Image Write Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | void *write_imagef*(_aQual_ image2d_t _image_, int2 _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image2d_t _image_, int2 _coord_, half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image2d_t _image_, int2 _coord_, int4 _color_) + void *write_imageui*(_aQual_ image2d_t _image_, int2 _coord_, uint4 _color_) | Write _color_ value to location specified by _coord.xy_ in the 2D @@ -9417,7 +11768,9 @@ For write functions this may be `write_only` or `read_write`. and must be in the range [0, image width-1] and [0, image height-1] respectively. - *write_imagef* can only be used with image objects created with + *write_imagef* +ifdef::cl_khr_fp16[and *write_imageh*] + can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. @@ -9439,14 +11792,25 @@ For write functions this may be `write_only` or `read_write`. `CL_UNSIGNED_INT16` and + `CL_UNSIGNED_INT32`. - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for + The behavior of *write_imagef*, +ifdef::cl_khr_fp16[*write_imageh*,] + *write_imagei* and *write_imageui* for image objects created with _image_channel_data_type_ values not specified in the description above or with _x_ and _y_ coordinate values that are not in the range [0, image width-1] and [0, image height-1], respectively, is undefined. + +ifdef::cl_khr_fp16[] + *write_imageh* <> support for the + `<>` extension macro. +endif::cl_khr_fp16[] | | | void *write_imagef*(_aQual_ image2d_array_t _image_, int4 _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image2d_array_t _image_, int4 _coord_, + half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image2d_array_t _image_, int4 _coord_, int4 _color_) + void *write_imageui*(_aQual_ image2d_array_t _image_, int4 _coord_, @@ -9460,7 +11824,9 @@ For write functions this may be `write_only` or `read_write`. coordinates, and must be in the range [0, image width-1] and [0, image height-1], and [0, image number of layers-1], respectively. - *write_imagef* can only be used with image objects created with + *write_imagef* +ifdef::cl_khr_fp16[and *write_imageh*] + can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. @@ -9482,21 +11848,36 @@ For write functions this may be `write_only` or `read_write`. `CL_UNSIGNED_INT16` and + `CL_UNSIGNED_INT32`. - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for + The behavior of *write_imagef*, +ifdef::cl_khr_fp16[*write_imageh*,] + *write_imagei* and *write_imageui* for image objects created with _image_channel_data_type_ values not specified in the description above or with (_x_, _y_, _z_) coordinate values that are not in the range [0, image width-1], [0, image height-1], and [0, image number of layers-1], respectively, is undefined. + +ifdef::cl_khr_fp16[] + *write_imageh* <> support for the + `<>` extension macro. +endif::cl_khr_fp16[] | | | void *write_imagef*(_aQual_ image1d_t _image_, int _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image1d_t _image_, int _coord_, + half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image1d_t _image_, int _coord_, int4 _color_) + void *write_imageui*(_aQual_ image1d_t _image_, int _coord_, uint4 _color_) + void *write_imagef*(_aQual_ image1d_buffer_t _image_, int _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image1d_buffer_t _image_, int _coord_, + half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image1d_buffer_t _image_, int _coord_, int4 _color_) + void *write_imageui*(_aQual_ image1d_buffer_t _image_, int _coord_, @@ -9508,7 +11889,9 @@ For write functions this may be `write_only` or `read_write`. _coord_ is considered to be an unnormalized coordinate, and must be in the range [0, image width-1]. - *write_imagef* can only be used with image objects created with + *write_imagef* +ifdef::cl_khr_fp16[and *write_imageh*] + can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. @@ -9530,15 +11913,26 @@ For write functions this may be `write_only` or `read_write`. `CL_UNSIGNED_INT16` and + `CL_UNSIGNED_INT32`. - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for + The behavior of *write_imagef*, +ifdef::cl_khr_fp16[*write_imageh*,] + *write_imagei* and *write_imageui* for image objects created with _image_channel_data_type_ values not specified in the description above, or with a coordinate value that is not in the range [0, image width-1], is undefined. <> support for OpenCL C 1.2 or newer. + +ifdef::cl_khr_fp16[] + *write_imageh* <> support for the + `<>` extension macro. +endif::cl_khr_fp16[] | | | void *write_imagef*(_aQual_ image1d_array_t _image_, int2 _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image1d_array_t _image_, int2 _coord_, + half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image1d_array_t _image_, int2 _coord_, int4 _color_) + void *write_imageui*(_aQual_ image1d_array_t _image_, int2 _coord_, @@ -9551,7 +11945,9 @@ For write functions this may be `write_only` or `read_write`. and must be in the range [0, image width-1] and [0, image number of layers-1], respectively. - *write_imagef* can only be used with image objects created with + *write_imagef* +ifdef::cl_khr_fp16[and *write_imageh*] + can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. @@ -9573,7 +11969,9 @@ For write functions this may be `write_only` or `read_write`. `CL_UNSIGNED_INT16` and + `CL_UNSIGNED_INT32`. - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for + The behavior of *write_imagef*, +ifdef::cl_khr_fp16[*write_imageh*,] + *write_imagei* and *write_imageui* for image objects created with _image_channel_data_type_ values not specified in the description above or with (_x_, _y_) coordinate values that are not in the range [0, image width-1] and [0, image @@ -9604,8 +12002,8 @@ For write functions this may be `write_only` or `read_write`. values that are not in the range [0, image width-1] and [0, image height-1], respectively, is undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | | void *write_imagef*(_aQual_ image2d_array_depth_t _image_, int4 _coord_, float _depth_) @@ -9631,25 +12029,31 @@ For write functions this may be `write_only` or `read_write`. values that are not in the range [0, image width-1], [0, image height-1], [0, image number of layers-1], respectively, is undefined. - <> support for OpenCL C 2.0 or newer, also see - `cl_khr_depth_images` extension. + <> support for OpenCL C 2.0 or newer, or for + the `<>` extension macro. | | | void *write_imagef*(_aQual_ image3d_t _image_, int4 _coord_, float4 _color_) + +ifdef::cl_khr_fp16[] + void *write_imageh*(_aQual_ image3d_t _image_, int4 _coord_, + half4 _color_) + +endif::cl_khr_fp16[] void *write_imagei*(_aQual_ image3d_t _image_, int4 _coord_, int4 _color_) + void *write_imageui*(_aQual_ image3d_t _image_, int4 _coord_, uint4 _color_) - | Write color value to location specified by _coord.xyz_ in the 3D image - object specified by _image_. + | Write _color_ value to the location specified by _coord.xyz_ in the 3D + image object specified by _image_. Appropriate data format conversion to the specified image format is done before writing the color value. _coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized coordinates, and must be in the range [0, image width-1], [0, image height-1], and [0, image depth-1], respectively. - *write_imagef* can only be used with image objects created with - image_channel_data_type set to one of the pre-defined packed formats + *write_imagef* +ifdef::cl_khr_fp16[and *write_imageh*] + can only be used with image objects created with + _image_channel_data_type_ set to one of the pre-defined packed formats or set to `CL_SNORM_INT8`, `CL_UNORM_INT8`, `CL_SNORM_INT16`, `CL_UNORM_INT16`, `CL_HALF_FLOAT` or `CL_FLOAT`. Appropriate data format conversion will be done to convert channel @@ -9657,28 +12061,225 @@ For write functions this may be `write_only` or `read_write`. channels are stored. *write_imagei* can only be used with image objects created with - image_channel_data_type set to one of the following values: + _image_channel_data_type_ set to one of the following values: `CL_SIGNED_INT8`, + - `CL_SIGNED_INT16` and + + `CL_SIGNED_INT16`, or + `CL_SIGNED_INT32`. *write_imageui* can only be used with image objects created with - image_channel_data_type set to one of the following values: + _image_channel_data_type_ set to one of the following values: `CL_UNSIGNED_INT8`, + - `CL_UNSIGNED_INT16` and + + `CL_UNSIGNED_INT16`, or + `CL_UNSIGNED_INT32`. - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for + The behavior of *write_imagef*, +ifdef::cl_khr_fp16[*write_imageh*,] + *write_imagei* and *write_imageui* for image objects with _image_channel_data_type_ values not specified in the description above or with (_x_, _y_, _z_) coordinate values that are not in the range [0, image width-1], [0, image height-1], and [0, image depth-1], respectively, is undefined. <> support for OpenCL C 2.0, or OpenCL C 3.0 or - newer and the {opencl_c_3d_image_writes} feature, or the - `cl_khr_3d_image_writes` extension. + newer and the {c_3d_image_writes} feature, or the + `<>` extension. + +ifdef::cl_khr_fp16[] + *write_imageh* <> support for the + `<>` extension macro. +endif::cl_khr_fp16[] + +ifdef::cl_khr_mipmap_image_writes[] +a| +[source,opencl_c] +---- +void write_imagef( + write_only image2d_t image, + int2 coord, + int lod, + float4 color) + +void write_imagei( + write_only image2d_t image, + int2 coord, + int lod, + int4 color) + +void write_imageui( + write_only image2d_t image, + int2 coord, + int lod, + uint4 color) + +void write_imagef( + write_only image2d_depth_t image, + int2 coord, + int lod, + float depth) +---- + | Write _color_ value to location specified by _coord.xy_ in the mip + level specified by _lod_ in the 2D image object specified by _image_. + Appropriate data format conversion to the specified image format is + done before writing the color value. + + _lod_ must be in the range [0, _miplevels_-1]. + _coord.x_ and _coord.y_ are considered to be unnormalized coordinates + and must be in the range [0, _mipwidth_-1] and [0, _mipheight_-1] + respectively. + Behavior is undefined if _lod_, _coord.x_, or _coord.y_ is not in + range. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +void write_imagef( + write_only image1d_t image, + int coord, + int lod, + float4 color) + +void write_imagei( + write_only image1d_t image, + int coord, + int lod, + int4 color) + +void write_imageui( + write_only image1d_t image, + int coord, + int lod, + uint4 color) +---- + | Write _color_ value to location specified by _coord_ in the mip level + specified by _lod_ in the 1D image object specified by _image_. + Appropriate data format conversion to the specified image format is + done before writing the color value. + + _lod_ must be in the range [0, _miplevels_-1]. + _coord_ is considered to be an unnormalized coordinate and must be in + the range [0, _mipwidth_-1]. + Behavior is undefined if _lod_ or _coord_ is not in range. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +void write_imagef( + write_only image1d_array_t image, + int2 coord, + int lod, + float4 color) + +void write_imagei( + write_only image1d_array_t image, + int2 coord, + int lod, + int4 color) + +void write_imageui( + write_only image1d_array_t image, + int2 coord, + int lod, + uint4 color) +---- + | Write _color_ value to location specified by _coord.x_ in the 1D image + identified by _coord.y_ and mip level _lod_ in the 1D image array + specified by _image_. + Appropriate data format conversion to the specified image format is done + before writing the color value. + + _lod_ must be in the range [0, _miplevels_-1]. + _coord.x_ and _coord.y_ are considered to be unnormalized coordinates + and must be in the range [0, _mipwidth_-1] and [0, _miplayers_ -1] + respectively. + Behavior is undefined if _lod_, _coord.x_, or _coord.y_ is not in range. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +void write_imagef( + write_only image2d_array_t image, + int4 coord, + int lod, + float4 color) + +void write_imagei( + write_only image2d_array_t image, + int4 coord, + int lod, + int4 color) + +void write_imageui( + write_only image2d_array_t image, + int4 coord, + int lod, + uint4 color) + +void write_imagef( + write_only image2d_array_depth_t image, + int4 coord, + int lod, + float depth) +---- + | Write _color_ value to location specified by _coord.xy_ in the 2D image + identified by _coord.z_ and mip level _lod_ in the 2D image array + specified by _image_. + Appropriate data format conversion to the specified image format is done + before writing the color value. + + _lod_ must be in the range [0, _miplevels_-1]. + _coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized + coordinates and must be in the range [0, _mipwidth_-1], [0, + _mipheight_-1], and [0, _miplayers_-1] respectively. + Behavior is undefined if + _lod_, _coord.x_, _coord.y_, or _coord.z_ is not in range. + + <> support for the + `<>` extension macro. +a| +[source,opencl_c] +---- +void write_imagef( + write_only image3d_t image, + int4 coord, + int lod, + float4 color) + +void write_imagei( + write_only image3d_t image, + int4 coord, + int lod, + int4 color) + +void write_imageui( + write_only image3d_t image, + int4 coord, + int lod, + uint4 color) +---- + | Write _color_ value to location specified by _coord.xyz_ and mip level + _lod_ in the 3D image object specified by _image_. + Appropriate data format conversion to the specified image format is done + before writing the color value. + + _lod_ must be in the range [0, _miplevels_-1]. + _coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized + coordinates and must be in the range [0, _mipwidth_-1], [0, + _mipheight_-1] and [0, _mipdepth_-1] respectively. + Behavior is undefined if _lod_, _coord.x_, _coord.y_, or _coord.z_ is + not in range. + + <> support for the + `<>` extension macro. +endif::cl_khr_mipmap_image_writes[] + |==== -- @@ -9697,55 +12298,87 @@ For query functions this may be `read_only`, `write_only` or `read_write`. [[table-image-query]] .Built-in Image Query Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int *get_image_width*(_aQual_ image2d_t _image_) + - int *get_image_width*(_aQual_ image3d_t _image_) + + int *get_image_width*(_aQual_ image3d_t _image_) - For OpenCL C 1.2 or newer: + + For OpenCL C 1.2 or newer: int *get_image_width*(_aQual_ image1d_t _image_) + int *get_image_width*(_aQual_ image1d_buffer_t _image_) + int *get_image_width*(_aQual_ image1d_array_t _image_) + - int *get_image_width*(_aQual_ image2d_array_t _image_) + + int *get_image_width*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: int *get_image_width*(_aQual_ image2d_depth_t _image_) + int *get_image_width*(_aQual_ image2d_array_depth_t _image_) + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + int *get_image_width*(_aQual_ image2d_msaa_t image) + + int *get_image_width*(_aQual_ image2d_array_msaa_t image) + + int *get_image_width*(_aQual_ image2d_msaa_depth_t image) + + int *get_image_width*(_aQual_ image2d_array_msaa_depth_t image) +endif::cl_khr_gl_msaa_sharing[] | Return the image width in pixels. + | int *get_image_height*(_aQual_ image2d_t _image_) + - int *get_image_height*(_aQual_ image3d_t _image_) + + int *get_image_height*(_aQual_ image3d_t _image_) - For OpenCL C 1.2 or newer: + + For OpenCL C 1.2 or newer: - int *get_image_height*(_aQual_ image2d_array_t _image_) + + int *get_image_height*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: int *get_image_height*(_aQual_ image2d_depth_t _image_) + int *get_image_height*(_aQual_ image2d_array_depth_t _image_) + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + int *get_image_height*(_aQual_ image2d_msaa_t image) + + int *get_image_height*(_aQual_ image2d_array_msaa_t image) + + int *get_image_height*(_aQual_ image2d_msaa_depth_t image) + + int *get_image_height*(_aQual_ image2d_array_msaa_depth_t image) +endif::cl_khr_gl_msaa_sharing[] | Return the image height in pixels. + | int *get_image_depth*(image3d_t _image_) | Return the image depth in pixels. | | | int *get_image_channel_data_type*(_aQual_ image2d_t _image_) + - int *get_image_channel_data_type*(_aQual_ image3d_t _image_) + + int *get_image_channel_data_type*(_aQual_ image3d_t _image_) - For OpenCL C 1.2 or newer: + + For OpenCL C 1.2 or newer: int *get_image_channel_data_type*(_aQual_ image1d_t _image_) + int *get_image_channel_data_type*(_aQual_ image1d_buffer_t _image_) + int *get_image_channel_data_type*(_aQual_ image2d_t _image_) + int *get_image_channel_data_type*(_aQual_ image3d_t _image_) + int *get_image_channel_data_type*(_aQual_ image1d_array_t _image_) + - int *get_image_channel_data_type*(_aQual_ image2d_array_t _image_) + + int *get_image_channel_data_type*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: int *get_image_channel_data_type*(_aQual_ image2d_depth_t _image_) + int *get_image_channel_data_type*(_aQual_ image2d_array_depth_t _image_) + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + int *get_image_channel_data_type*(_aQual_ image2d_msaa_t image) + + int *get_image_channel_data_type*(_aQual_ image2d_array_msaa_t image) + + int *get_image_channel_data_type*(_aQual_ image2d_msaa_depth_t image) + + int *get_image_channel_data_type*(_aQual_ image2d_array_msaa_depth_t image) +endif::cl_khr_gl_msaa_sharing[] | Return the channel data type. Valid values are: `CLK_SNORM_INT8` + @@ -9764,23 +12397,34 @@ For query functions this may be `read_only`, `write_only` or `read_write`. `CLK_HALF_FLOAT` + `CLK_FLOAT` + - Additionally, for OpenCL C 3.0 or newer: + + Additionally, for OpenCL C 3.0 or newer: `CLK_UNORM_INT_101010_2` footnote:[{fn-CLK_UNORM_INT_101010_2}] + | int *get_image_channel_order*(_aQual_ image2d_t _image_) + - int *get_image_channel_order*(_aQual_ image3d_t _image_) + + int *get_image_channel_order*(_aQual_ image3d_t _image_) - For OpenCL C 1.2 or newer: + + For OpenCL C 1.2 or newer: int *get_image_channel_order*(_aQual_ image1d_t _image_) + int *get_image_channel_order*(_aQual_ image1d_buffer_t _image_) + int *get_image_channel_order*(_aQual_ image1d_array_t _image_) + - int *get_image_channel_order*(_aQual_ image2d_array_t _image_) + + int *get_image_channel_order*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: int *get_image_channel_order*(_aQual_ image2d_depth_t _image_) + int *get_image_channel_order*(_aQual_ image2d_array_depth_t _image_) + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + int *get_image_channel_order*(_aQual_ image2d_msaa_t image) + + int *get_image_channel_order*(_aQual_ image2d_array_msaa_t image) + + int *get_image_channel_order*(_aQual_ image2d_msaa_depth_t image) + + int *get_image_channel_order*(_aQual_ image2d_array_msaa_depth_t image) +endif::cl_khr_gl_msaa_sharing[] | Return the image channel order. Valid values are: `CLK_A` + @@ -9792,15 +12436,15 @@ For query functions this may be `read_only`, `write_only` or `read_write`. `CLK_ARGB` + `CLK_BGRA` + `CLK_INTENSITY` + - `CLK_LUMINANCE` + + `CLK_LUMINANCE` - Additionally, for OpenCL C 1.1 or newer: + + Additionally, for OpenCL C 1.1 or newer: `CLK_Rx` + `CLK_RGx` + - `CLK_RGBx` + + `CLK_RGBx` - Additionally, for OpenCL C 2.0 or newer: + + Additionally, for OpenCL C 2.0 or newer: `CLK_ABGR` + `CLK_DEPTH` + @@ -9808,37 +12452,82 @@ For query functions this may be `read_only`, `write_only` or `read_write`. `CLK_sRGBx` + `CLK_sRGBA` + `CLK_sBGRA` + | | -| int2 *get_image_dim*(_aQual_ image2d_t _image_) + +| int2 *get_image_dim*(_aQual_ image2d_t _image_) - For OpenCL C 1.2 or newer: + + For OpenCL C 1.2 or newer: - int2 *get_image_dim*(_aQual_ image2d_array_t _image_) + + int2 *get_image_dim*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: int2 *get_image_dim*(_aQual_ image2d_depth_t _image_) + int2 *get_image_dim*(_aQual_ image2d_array_depth_t _image_) - | Return the 2D image width and height as an int2 type. + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + int2 *get_image_dim*(_aQual_ image2d_msaa_t image) + + int2 *get_image_dim*(_aQual_ image2d_array_msaa_t image) + + int2 *get_image_dim*(_aQual_ image2d_msaa_depth_t image) + + int2 *get_image_dim*(_aQual_ image2d_array_msaa_depth_t image) +endif::cl_khr_gl_msaa_sharing[] + | Return the 2D image width and height as an `int2` type. The width is returned in the _x_ component, and the height in the _y_ component. + | int4 *get_image_dim*(_aQual_ image3d_t _image_) | Return the 3D image width, height, and depth as an `int4` type. The width is returned in the _x_ component, height in the _y_ component, depth in the _z_ component and the _w_ component is 0. | | -| For OpenCL C 1.2 or newer: + +| For OpenCL C 1.2 or newer: - size_t *get_image_array_size*(_aQual_ image2d_array_t _image_) + + size_t *get_image_array_size*(_aQual_ image2d_array_t _image_) - For OpenCL C 2.0 or newer, also see `cl_khr_depth_images` extension: + + For OpenCL C 2.0 or newer, or if the `<>` extension + macro is supported: size_t *get_image_array_size*(_aQual_ image2d_array_depth_t _image_) + +ifdef::cl_khr_gl_msaa_sharing[] + If the `<>` extension macro is supported: + + size_t *get_image_array_size*(_aQual_ image2d_array_msaa_depth_t _image_) +endif::cl_khr_gl_msaa_sharing[] | Return the number of images in the 2D image array. -| For OpenCL C 1.2 or newer: + + +| For OpenCL C 1.2 or newer: size_t *get_image_array_size*(_aQual_ image1d_array_t _image_) | Return the number of images in the 1D image array. + +ifdef::cl_khr_gl_msaa_sharing[] +| If the `<>` extension macro is supported: + + int *get_image_num_samples*(_aQual_ image2d_msaa_t _image_) + + int *get_image_num_samples*(_aQual_ image2d_array_msaa_t _image_) + + int *get_image_num_samples*(_aQual_ image2d_msaa_depth_t _image_) + + int *get_image_num_samples*(_aQual_ image2d_array_msaa_depth_t _image_) + | Return the number of samples in the 2D MSAA image +endif::cl_khr_gl_msaa_sharing[] + +ifdef::cl_khr_mipmap_image[] +| If the `<>` extension macro is supported: + + int *get_image_num_mip_levels*(_aQual_ image1d_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image2d_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image3d_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image1d_array_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image2d_array_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image2d_depth_t _image_) + + int *get_image_num_mip_levels*(_aQual_ image2d_array_depth_t _image_) + + | Return the number of mip levels in _image_. +endif::cl_khr_mipmap_image[] + |==== The values returned by *get_image_channel_data_type* and @@ -9853,7 +12542,7 @@ channel data type that is an unnormalized unsigned 8-bit integer. [[reading-and-writing-to-the-same-image-in-a-kernel]] -==== Reading and writing to the same image in a kernel +==== Reading and Writing to the Same Image in a Kernel The *atomic_work_item_fence*(`CLK_IMAGE_MEM_FENCE`) built-in function can be used to make sure that sampler-less writes are visible to later reads by the @@ -9894,7 +12583,7 @@ foo(read_write image2d_t img, ... ) [[mapping-image-channels-to-color-values-returned-by-read_image-and-color-values-passed-to-write_image-to-image-channels]] -==== Mapping image channels to color values returned by read_image and color values passed to write_image to image channels +==== Mapping Image Channels to Color Values Returned by read_image and Color Values Passed to write_image to Image Channels The following table describes the mapping of the number of channels of an image element to the appropriate components in the `float4`, `int4` or @@ -9903,9 +12592,9 @@ image element to the appropriate components in the `float4`, `int4` or The unmapped components will be set to 0.0 for red, green and blue channels and will be set to 1.0 for the alpha channel. -[cols=",",] +[cols=",",options="header",] |==== -| *Channel Order* | `float4`, `int4` or `uint4` *components of channel data* +| Channel Order | `float4`, `int4` or `uint4` components of channel data | `CL_R`, `CL_Rx` | (r, 0.0, 0.0, 1.0) | `CL_A` | (0.0, 0.0, 0.0, a) | `CL_RG`, `CL_RGx` | (r, g, 0.0, 1.0) @@ -9920,8 +12609,8 @@ and will be set to 1.0 for the alpha channel. For `CL_DEPTH` images, a scalar value is returned by *read_imagef* or supplied to *write_imagef*. -<> support for OpenCL C 2.0 or newer, also see -`cl_khr_depth_images` extension. +<> support for OpenCL C 2.0 or newer, or for +the `<>` extension macro. [NOTE] ==== @@ -9945,7 +12634,6 @@ support will result in a `CL_OUT_OF_RESOURCES` error being returned. [open,refpage='workGroupFunctions',desc='Work-group Collective Functions',type='freeform',spec='clang',anchor='work-group-functions',xrefs='',alias='work_group_all work_group_any work_group_broadcast work_group_reduce work_group_scan_exclusive work_group_scan_inclusive'] -- - NOTE: The functionality described in this section <> support for OpenCL C 2.0, or OpenCL C 3.0 or newer and the {opencl_c_work_group_collective_functions} feature. @@ -9961,9 +12649,9 @@ footnote:[{fn-double-supported}] as the type for the arguments. [[table-builtin-work-group]] .Built-in Work-group Collective Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int *work_group_all*(int _predicate_) | Evaluates _predicate_ for all work-items in the work-group and returns a non-zero value if _predicate_ evaluates to non-zero for all @@ -10056,6 +12744,161 @@ given work-group. -- +ifdef::cl_khr_work_group_uniform_arithmetic[] +[[work-group-collective-uniform-arithmetic-functions]] +=== Work-group Collective Uniform Arithmetic Functions + +[open,refpage='workGroupUniformArithmeticFunctions',desc='Work-group Collective Uniform Arithmetic Functions',type='freeform',spec='clang',anchor='work-group-collective-uniform-arithmetic-functions',xrefs='workGroupFunctions',alias='work_group_all work_group_any work_group_broadcast work_group_reduce work_group_scan_exclusive work_group_scan_inclusive'] +-- +NOTE: The functionality described in this section <> +support for OpenCL C 2.0 and the `<>` +extension macro. + +The <> table describes the OpenCL C +programming language built-in functions that perform logical arithmetic +operations across work items in a work-group. +These functions must be encountered by all work items in a work-group +executing the kernel, otherwise the behavior is undefined. +For these functions, a non-zero _predicate_ argument or return value is +logically `true` and a zero _predicate_ argument or return value is +logically `false`. + +[[table-builtin-work-group-logical]] +.Built-in Work-group Logical Arithmetic Functions +[cols="2a,1",options="header"] +|==== +| Function | Description +|[source,opencl_c] +---- +int work_group_reduce_logical_and(int predicate); +int work_group_reduce_logical_or(int predicate); +int work_group_reduce_logical_xor(int predicate); +---- + | Returns the logical *and*, *or*, or *xor* of _predicate_ for all work + items in the work-group. +|[source,opencl_c] +---- +int work_group_scan_inclusive_logical_and(int predicate); +int work_group_scan_inclusive_logical_or(int predicate); +int work_group_scan_inclusive_logical_xor(int predicate); +---- + | Returns the result of an inclusive scan operation, which is the logical + *and*, *or*, or *xor* of _predicate_ for all work items in the + work-group with a work-group linear local ID less than or equal to this + work item's work-group linear local ID. +|[source,opencl_c] +---- +int work_group_scan_exclusive_logical_and(int predicate); +int work_group_scan_exclusive_logical_or(int predicate); +int work_group_scan_exclusive_logical_xor(int predicate); +---- + | Returns the result of an exclusive scan operation, which is the logical + *and*, *or*, or *xor* of _predicate_ for all work items in the + work-group with a work-group linear local ID less than this work item's + work-group linear local ID. + + If there is no work item in the work-group with a work-group linear + local ID less than this work item's work-group linear local ID then an + identity value `I` is returned. + For *and*, the identity value is `true` (non-zero). + For *or* and *xor*, the identity value is `false` (zero). +|==== + +The <> table describes the OpenCL +C programming language built-in functions that perform bitwise integer +operations across work items in a work-group. +These functions must be encountered by all work items in a work-group +executing the kernel, otherwise the behavior is undefined. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `int`, `uint`, `long`, and `ulong`. + +[[table-builtin-work-group-bitwise-integer]] +.Built-in Work-group Bitwise Integer Functions +[cols="2a,1",options="header"] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype work_group_reduce_and(gentype value); +gentype work_group_reduce_or(gentype value); +gentype work_group_reduce_xor(gentype value); +---- + | Returns the bitwise *and*, *or*, or *xor* of _value_ for all work items + in the work-group. +|[source,opencl_c] +---- +gentype work_group_scan_inclusive_and(gentype value); +gentype work_group_scan_inclusive_or(gentype value); +gentype work_group_scan_inclusive_xor(gentype value); +---- + | Returns the result of an inclusive scan operation, which is the bitwise + *and*, *or*, or *xor* of _value_ for all work items in the work-group + with a work-group linear local ID less than or equal to this work item's + work-group linear local ID. +|[source,opencl_c] +---- +gentype work_group_scan_exclusive_and(gentype value); +gentype work_group_scan_exclusive_or(gentype value); +gentype work_group_scan_exclusive_xor(gentype value); +---- + | Returns the result of an exclusive scan operation, which is the bitwise + *and*, *or*, or *xor* of _value_ for all work items in the work-group + with a work-group linear local ID less than this work item's work-group + linear local ID. + + If there is no work item in the work-group with a work-group linear + local ID less than this work item's work-group linear local ID then an + identity value `I` is returned. + For *and*, the identity value is `~0` (all bits set). + For *or* and *xor*, the identity value is `0`. +|==== + +The <> table describes the OpenCL C +programming language built-in functions that perform multiplicative +operations across work items in a work-group. +These functions must be encountered by all work items in a work-group +executing the kernel, otherwise the behavior is undefined. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `int`, `uint`, `long`, `ulong`, +`float`, `double` (if double precision is supported), or `half` (if half +precision is supported). + +[[table-builtin-work-group-multiplicative]] +.Built-in Work-group Multiplicative Functions +[cols="2a,1",options="header"] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype work_group_reduce_mul(gentype value); +---- + | Returns the multiplication of _value_ for all work items in the + work-group. +|[source,opencl_c] +---- +gentype work_group_scan_inclusive_mul(gentype value); +---- + | Returns the result of an inclusive scan operation which is the + multiplication of _value_ for all work items in the work-group with a + work-group linear local ID less than or equal to this work item's + work-group linear local ID. +|[source,opencl_c] +---- +gentype work_group_scan_exclusive_mul(gentype value); +---- + | Returns the result of an exclusive scan operation which is the + multiplication of _value_ for all work items in the work-group with a + work-group linear local ID less than this work item's work-group linear + local ID. + + If there is no work item in the work-group with a work-group linear + local ID less than this work item's work-group linear local ID then the + identity value `1` is returned. +|==== +-- +endif::cl_khr_work_group_uniform_arithmetic[] + + [[pipe-functions]] === Pipe Functions @@ -10135,9 +12978,9 @@ pipe functions listed in the following table. [[table-builtin-pipe]] .Built-in Pipe Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | int *read_pipe*(read_only pipe gentype _p_, gentype *_ptr_) | Read packet from pipe _p_ into _ptr_. Returns 0 if *read_pipe* is successful and a negative value if the @@ -10205,9 +13048,9 @@ pipe functions listed in the following table. [[table-builtin-pipe-work-group]] .Built-in Pipe Work-group Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | reserve_id_t *work_group_reserve_read_pipe*(read_only pipe gentype _p_, uint _num_packets_) + reserve_id_t *work_group_reserve_write_pipe*(write_only pipe gentype _p_, @@ -10279,9 +13122,9 @@ For pipe query functions this may be `read_only` or `write_only`. [[table-builtin-pipe-query]] .Built-in Pipe Query Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Function* | *Description* +| Function | Description | uint *get_pipe_num_packets*(_aQual_ pipe gentype _p_) | Returns the number of available entries in the pipe. The number of available entries in a pipe is a dynamic value. @@ -10343,33 +13186,54 @@ single semantic step. The following table describes the list of built-in functions that can be used to enqueue a kernel(s). +ifdef::cl_khr_device_enqueue_local_arg_types[] +When the `<>` extension macro is +supported, the <> and <> described in this section can use any of the built-in OpenCL C +scalar or vector integer or floating-point data types, or any user defined +type built from these scalar and vector data types, as the pointee type of +their arguments. +This is indicated by the generic type name `gentype` in those function +signatures. + +When the `<>` extension macro is +not supported, the pointee type of these functions must be `void`. + +:localArgType: gentype +endif::cl_khr_device_enqueue_local_arg_types[] + +ifndef::cl_khr_device_enqueue_local_arg_types[] +:localArgType: void +endif::cl_khr_device_enqueue_local_arg_types[] + The macro `CLK_NULL_EVENT` refers to an invalid device event. The macro `CLK_NULL_QUEUE` refers to an invalid device queue. -- [[built-in-functions-enqueuing-a-kernel]] -==== Built-in Functions - Enqueuing a kernel +==== Built-in Functions - Enqueuing a Kernel [[table-builtin-kernel-enqueue]] .Built-in Kernel Enqueue Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Built-in Function* | *Description* +| Built-in Function | Description | int **enqueue_kernel**(queue_t _queue_, kernel_enqueue_flags_t _flags_, - const ndrange_t _ndrange_, void (^__block__)(void)) + + const ndrange_t _ndrange_, void (^__block__)(void)) + int **enqueue_kernel**(queue_t _queue_, kernel_enqueue_flags_t _flags_, - const ndrange_t _ndrange_, uint _num_events_in_wait_list_, - const clk_event_t *_event_wait_list_, clk_event_t *_event_ret_, - void (^__block__)(void)) + + const ndrange_t _ndrange_, uint _num_events_in_wait_list_, + const clk_event_t *_event_wait_list_, clk_event_t *_event_ret_, + void (^__block__)(void)) + int **enqueue_kernel**(queue_t _queue_, kernel_enqueue_flags_t _flags_, - const ndrange_t _ndrange_, void (^__block__)(local void *, ...), - uint size0, ...) + + const ndrange_t _ndrange_, void (^__block__)(local {localArgType} *, ...), + uint size0, ...) + int **enqueue_kernel**(queue_t _queue_, kernel_enqueue_flags_t _flags_, - const ndrange_t _ndrange_, uint _num_events_in_wait_list_, - const clk_event_t *_event_wait_list_, clk_event_t *_event_ret_, - void (^__block__)(local void *, ...), uint size0, ...) + const ndrange_t _ndrange_, uint _num_events_in_wait_list_, + const clk_event_t *_event_wait_list_, clk_event_t *_event_ret_, + void (^__block__)(local {localArgType} *, ...), uint size0, ...) | Enqueue the block for execution to _queue_. If an event is returned, *enqueue_kernel* performs an implicit retain @@ -10510,7 +13374,7 @@ foo(global int *a, local int *lptr, ...) [[arguments-that-are-a-pointer-type-to-local-address-space]] -==== Arguments that are a pointer type to local address space +==== Arguments That are a Pointer Type to Local Address Space A block passed to enqueue_kernel can have arguments declared to be a pointer to `local` memory. @@ -10645,18 +13509,19 @@ evaluate_dp_work_A(queue_t q,...) [[determining-when-a-child-kernel-begins-execution]] -==== Determining when a child kernel begins execution +==== Determining when a Child Kernel Begins Execution The `kernel_enqueue_flags_t` footnote:[{fn-dse-kernel_enqueue_flags_t}] argument to the `enqueue_kernel` built-in functions can be used to specify when the child kernel begins execution. -Supported values are described in the table below: +Supported values are described in the <>: [[table-kernel-enqueue-flags]] .Kernel Enqueue Flags -[cols=",",] +[cols=",",options="header",] |==== -| `kernel_enqueue_flags_t` *enum* | *Description* +| `kernel_enqueue_flags_t` enum | Description | `CLK_ENQUEUE_FLAGS_NO_WAIT` | Indicates that the enqueued kernels do not need to wait for the parent kernel to finish execution before they begin execution. @@ -10681,7 +13546,7 @@ child kernels can begin execution. [[determining-when-a-parent-kernel-has-finished-execution]] -==== Determining when a parent kernel has finished execution +==== Determining When a Parent Kernel has Finished Execution A parent kernel's execution status is considered to be complete when it and all its child kernels have finished execution. @@ -10708,24 +13573,27 @@ execution. [[built-in-functions-kernel-query-functions]] ==== Built-in Functions - Kernel Query Functions +// Note: the Unicode "zero width space" (​) is used in some places to +// cause long function names to break much more sensibly. +// Probably the asciidoc built-in {zwsp} should be used instead. + [open,refpage='kernelQueryFunctions',desc='Built-in Functions - Kernel Query Functions',type='freeform',spec='clang',anchor='built-in-functions-kernel-query-functions',xrefs='enqueue_kernel',alias='get_kernel_preferred get_kernel_work_group_size'] -- - [[table-builtin-kernel-query]] .Built-in Kernel Query Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Built-in Function* | *Description* +| Built-in Function | Description | uint *get_kernel_work_group_size*(void (^block)(void)) + - uint *get_kernel_work_group_size*(void (^block)(local void *, ...)) + uint *get_kernel_work_group_size*(void (^block)(local {localArgType} *, ...)) | This provides a mechanism to query the maximum work-group size that can be used to execute a block on a specific device given by _device_. _block_ specifies the block to be enqueued. -| uint *get_kernel_preferred_* *work_group_size_multiple*( +| uint *get_kernel_preferred_​work_group_size_multiple*( void (^block)(void)) + - uint *get_kernel_preferred_* *work_group_size_multiple*( - void (^block)(local void *, ...)) + uint *get_kernel_preferred_​work_group_size_multiple*( + void (^block)(local {localArgType} *, ...)) | Returns the preferred multiple of work-group size for launch. This is a performance hint. Specifying a work-group size that is not a multiple of the value @@ -10737,7 +13605,7 @@ execution. [[built-in-functions-queuing-other-commands]] -==== Built-in Functions - Queuing other commands +==== Built-in Functions - Queuing Other Commands [open,refpage='enqueue_marker',desc='Built-in Functions - Queuing Other Commands',type='freeform',spec='clang',anchor='built-in-functions-queuing-other-commands',xrefs='enqueue_kernel'] -- @@ -10747,9 +13615,9 @@ used to enqueue commands such as a marker. [[table-builtin-other-enqueue]] .Built-in Other Enqueue Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Built-in Function* | *Description* +| Built-in Function | Description | int *enqueue_marker*(queue_t _queue_, uint _num_events_in_wait_list_, const clk_event_t *_event_wait_list_, clk_event_t *_event_ret_) | Enqueue a marker command to _queue_. @@ -10794,9 +13662,9 @@ events. [[table-builtin-event]] .Built-in Event Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Built-in Function* | *Description* +| Built-in Function | Description | void *retain_event*(clk_event_t _event_) | Increments the event reference count. @@ -10969,9 +13837,9 @@ foo(queue_t q, ...) [[table-builtin-helper]] .Built-in Helper Functions -[cols=",",] +[cols=",",options="header",] |==== -| *Built-in Function* | *Description* +| Built-in Function | Description | queue_t *get_default_queue*(void) | Returns the default device queue. If a default device queue has not been created, `CLK_NULL_QUEUE` is @@ -10998,27 +13866,43 @@ foo(queue_t q, ...) |==== -- + [[sub-group-functions]] -=== Sub-group Functions +=== Sub-Group Functions -[open,refpage='subGroupFunctions',desc='Sub-group Functions',type='freeform',spec='clang',anchor='sub-group-functions',xrefs='',alias='sub_group_all sub_group_any sub_group_broadcast sub_group_reduce sub_group_scan_exclusive sub_group_scan_inclusive sub_group_reserve_read_pipe sub_gorup_reserve_write_pipe sub_group_commit_read_pipe sub_group_commit_write_pipe get_kernel_sub_group_count_for_ndrange get_kernel_max_sub_group_size_for_ndrange'] +[open,refpage='subGroupFunctions',desc='Sub-Group Functions',type='freeform',spec='clang',anchor='sub-group-functions',xrefs='',alias='sub_group_all sub_group_any sub_group_broadcast sub_group_reduce sub_group_scan_exclusive sub_group_scan_inclusive sub_group_reserve_read_pipe sub_gorup_reserve_write_pipe sub_group_commit_read_pipe sub_group_commit_write_pipe get_kernel_sub_group_count_for_ndrange get_kernel_max_sub_group_size_for_ndrange'] -- - NOTE: The functionality described in this section <> -support for OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. - -The table below describes OpenCL C programming language built-in functions that operate on a sub-group level. -These built-in functions must be encountered by all work-items in the sub-group executing the kernel. +support for +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +OpenCL C 3.0 or newer and the {opencl_c_subgroups} feature. + +The <> describes OpenCL C +programming language built-in functions that operate on a sub-group level. +These built-in functions must be encountered by all work-items in the +sub-group executing the kernel. For the functions below, the generic type name `gentype` may be the one of the supported built-in scalar data types `int`, `uint`, `long` -footnote:[{fn-int64-supported}], `ulong`, `half` footnote:[{fn-half-supported}], -`float`, and `double` footnote:[{fn-double-supported}]. - -.Built-in Sub-group Collective Functions +footnote:[{fn-int64-supported}], `ulong`, `half` +footnote:[{fn-half-supported}], `float`, and `double` +footnote:[{fn-double-supported}]. + +ifdef::cl_khr_subgroup_extended_types[] +NOTE: If the `<>` extension is supported, +the supported `gentype`s also include `char`, `uchar`, `short`, and +`ushort`. +For the `sub_group_broadcast` function, `gentype` may additionally be one of +the supported built-in vector data types `char__n__`, `uchar__n__`, +`short__n__`, `ushort__n__`, `int__n__`, `uint__n__`, `long__n__`, +`ulong__n__`, `float__n__`, `half__n__` footnote:[{fn-half-supported}], or +`double__n__` footnote:[{fn-double-supported}] +endif::cl_khr_subgroup_extended_types[] + +[[table-collective-functions]] +.Built-in Sub-Group Collective Functions [cols=",",options="header",] |==== -| *Function* -| *Description* +| Function | Description | int *sub_group_all* (int _predicate_) | Evaluates _predicate_ for all work-items in the sub-group and returns a @@ -11085,11 +13969,13 @@ The order of these floating-point operations is also non-deterministic for a giv ==== NOTE: The functionality described in the following table <> support for OpenCL C 3.0 or newer and the {opencl_c_subgroups} -and {opencl_c_pipes} features. +requires>> support +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +OpenCL C 3.0 or newer and the {opencl_c_subgroups} and {opencl_c_pipes} +features. -The following table describes built-in pipe functions that operate at a -sub-group level. +The <> describes built-in pipe +functions that operate at a sub-group level. These built-in functions must be encountered by all work-items in a sub-group executing the kernel with the same argument values, otherwise the behavior is undefined. @@ -11098,11 +13984,11 @@ scalar or vector integer or floating-point data types or any user defined type built from these scalar and vector data types can be used as the type for the arguments to the pipe functions listed in _table 6.29_. -.Built-in Sub-group Pipe Functions +[[table-pipe-functions]] +.Built-in Sub-Group Pipe Functions [cols=",",options="header",] |==== -| *Function* -| *Description* +| Function | Description | reserve_id_t *sub_group_reserve_read_pipe* ( + read_only pipe gentype _pipe_, + @@ -11138,17 +14024,19 @@ The order of sub-group based reservations that belong to different work groups is implementation-defined. NOTE: The functionality described in the following table <> support for OpenCL C 3.0 or newer and the {opencl_c_subgroups} -and {opencl_c_device_enqueue} features. +requires>> support +ifdef::cl_khr_subgroups[the `<>` extension macro; or for] +OpenCL C 3.0 or newer and the {opencl_c_subgroups} and +{opencl_c_device_enqueue} features. -The following table describes built-in functions to query sub-group -information for a block to be enqueued. +The <> describes built-in +functions to query sub-group information for a block to be enqueued. -.Built-in Sub-group Kernel Query Functions +[[table-kernel-query-functions]] +.Built-in Sub-Group Kernel Query Functions [cols="5,4",options="header",] |==== -| *Built-in Function* -| *Description* +| Built-in Function | Description | uint *get_kernel_sub_group_count_for_ndrange* ( + const ndrange_t _ndrange_, + @@ -11175,6 +14063,1164 @@ information for a block to be enqueued. |==== -- + +ifdef::cl_khr_subgroup_ballot[] +[[sub-group-ballot-functions]] +==== Built-in Sub-Group Ballot Functions + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +The <> describes OpenCL C +programming language built-in functions to allow work items in a sub-group +to collect and operate on ballots from work items in the sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. + +For the `sub_group_non_uniform_broadcast` and `sub_group_broadcast_first` +functions, the generic type name `gentype` may be one of the supported +built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, +`uint`, `long`, `ulong`, `float`, `half` footnote:[{fn-half-supported}], and +`double` footnote:[{fn-double-supported}]. + +For the `sub_group_non_uniform_broadcast` function, the generic type name +`gentype` may additionally be one of the supported built-in vector data +types `char__n__`, `uchar__n__`, `short__n__`, `ushort__n__`, `int__n__`, +`uint__n__`, `long__n__`, `ulong__n__`, `float__n__`, `half__n__` +footnote:[{fn-half-supported}], or `double__n__` +footnote:[{fn-double-supported}]. + +[[table-ballot-functions]] +.Built-in Sub-Group Ballot Functions +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_non_uniform_broadcast( + gentype value, + uint index ) +---- + | Returns _value_ for the work item with sub-group local ID equal to + _index_. + + Behavior is undefined when the value of _index_ is not equivalent for + all active work items in the sub-group. + + The return value is undefined if the work item with sub-group local ID + equal to _index_ is inactive or if _index_ is greater than or equal to + the size of the sub-group. +|[source,opencl_c] +---- +gentype sub_group_broadcast_first( + gentype value ) +---- + | Returns _value_ for the work item with the smallest sub-group local ID + among active work items in the sub-group. +|[source,opencl_c] +---- +uint4 sub_group_ballot( + int predicate ) +---- + | Returns a bitfield combining the _predicate_ values from all work items + in the sub-group. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. + The representative bit in the bitfield is set if the work item is active + and the _predicate_ is non-zero, and is unset otherwise. +|[source,opencl_c] +---- +int sub_group_inverse_ballot( + uint4 value ) +---- + | Returns the predicate value for this work item in the sub-group from the + bitfield _value_ representing predicate values from all work items in + the sub-group. + The predicate return value will be non-zero if the bit in the bitfield + _value_ for this work item is set, and zero otherwise. + + Behavior is undefined when _value_ is not equivalent for all active work + items in the sub-group. + + This is a specialized function that may perform better than the + equivalent `sub_group_ballot_bit_extract` on some implementations. +|[source,opencl_c] +---- +int sub_group_ballot_bit_extract( + uint4 value, + uint index ) +---- + | Returns the predicate value for the work item with sub-group local ID + equal to _index_ from the bitfield _value_ representing predicate values + from all work items in the sub-group. + The predicate return value will be non-zero if the bit in the bitfield + _value_ for the work item with sub-group local ID equal to _index_ is + set, and zero otherwise. + + The predicate return value is undefined if the work item with sub-group + local ID equal to _index_ is greater than or equal to the size of the + sub-group. +|[source,opencl_c] +---- +uint sub_group_ballot_bit_count( + uint4 value ) +---- + | Returns the number of bits that are set in the bitfield _value_, only + considering the bits in _value_ that represent predicate values + corresponding to sub-group local IDs less than the maximum sub-group + size within the dispatch (as returned by `get_max_sub_group_size`). +|[source,opencl_c] +---- +uint sub_group_ballot_inclusive_scan( + uint4 value ) +---- + | Returns the number of bits that are set in the bitfield _value_, only + considering the bits in _value_ representing work items with a sub-group + local ID less than or equal to this work item's sub-group local ID. +|[source,opencl_c] +---- +uint sub_group_ballot_exclusive_scan( + uint4 value ) +---- + | Returns the number of bits that are set in the bitfield _value_, only + considering the bits in _value_ representing work items with a sub-group + local ID less than this work item's sub-group local ID. +|[source,opencl_c] +---- +uint sub_group_ballot_find_lsb( + uint4 value ) +---- + | Returns the smallest sub-group local ID with a bit set in the bitfield + _value_, only considering the bits in _value_ that represent predicate + values corresponding to sub-group local IDs less than the maximum + sub-group size within the dispatch (as returned by + `get_max_sub_group_size`). + If no bits representing predicate values from all work items in the + sub-group are set in the bitfield _value_ then the return value is + undefined. +|[source,opencl_c] +---- +uint sub_group_ballot_find_msb( + uint4 value ) +---- + | Returns the largest sub-group local ID with a bit set in the bitfield + _value_, only considering the bits in _value_ that represent predicate + values corresponding to sub-group local IDs less than the maximum + sub-group size within the dispatch (as returned by + `get_max_sub_group_size`). + If no bits representing predicate values from all work items in the + sub-group are set in the bitfield _value_ then the return value is + undefined. +|[source,opencl_c] +---- +uint4 get_sub_group_eq_mask() +---- + | Generates a bitmask where the bit is set in the bitmask if the bit index + equals the sub-group local ID and unset otherwise. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. +|[source,opencl_c] +---- +uint4 get_sub_group_ge_mask() +---- + | Generates a bitmask where the bit is set in the bitmask if the bit index + is greater than or equal to the sub-group local ID and less than the + maximum sub-group size, and unset otherwise. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. +|[source,opencl_c] +---- +uint4 get_sub_group_gt_mask() +---- + | Generates a bitmask where the bit is set in the bitmask if the bit index + is greater than the sub-group local ID and less than the maximum + sub-group size, and unset otherwise. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. +|[source,opencl_c] +---- +uint4 get_sub_group_le_mask() +---- + | Generates a bitmask where the bit is set in the bitmask if the bit index + is less than or equal to the sub-group local ID and unset otherwise. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. +|[source,opencl_c] +---- +uint4 get_sub_group_lt_mask() +---- + | Generates a bitmask where the bit is set in the bitmask if the bit index + is less than the sub-group local ID and unset otherwise. + Bit zero of the first vector component represents the sub-group local ID + zero, with higher-order bits and subsequent vector components + representing, in order, increasing sub-group local IDs. +|==== + +endif::cl_khr_subgroup_ballot[] + + +ifdef::cl_khr_subgroup_clustered_reduce[] +[[sub-group-clustered-reduction-functions]] +==== Built-in Clustered Reduction Functions for Sub-Groups + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +This section describes arithmetic operations that are performed on a subset +of work items in a sub-group, referred to as a cluster. +A cluster is described by a specified cluster size. +Work items in a sub-group are assigned to clusters such that for cluster +size _n_, the _n_ work items in the sub-group with the smallest sub-group +local IDs are assigned to the first cluster, then the _n_ remaining work +items with the smallest sub-group local IDs are assigned to the next +cluster, and so on. +Behavior is undefined if the specified cluster size is not an integer +constant expression, is not a power-of-two, or is greater than the maximum +size of a sub-group within the dispatch. + + +===== Arithmetic Operations + +The table below describes the OpenCL C programming language built-in +functions that perform simple arithmetic operations on a cluster of work +items in a sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-clustered-reduce-math-functions]] +.Built-in Arithmetic Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_clustered_reduce_add( + gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_mul( + gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_min( + gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_max( + gentype value, uint clustersize ) +---- + | Returns the summation, multiplication, minimum, or maximum of _value_ + for all active work items in the sub-group within a cluster of the + specified _clustersize_. +|==== + +Note: The order of floating-point operations is not guaranteed for the +sub-group clustered reduction built-in functions that operate on +floating-point types, and the order of operations may additionally be +non-deterministic for a given sub-group. + + +===== Bitwise Operations + +The table below describes the OpenCL C programming language built-in +functions to perform simple bitwise integer operations across a cluster of +work items in a sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be the one of +the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, or `ulong`. + +[[table-clustered-reduce-bitwise-functions]] +.Built-in Bitwise Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_clustered_reduce_and( + gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_or( + gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_xor( + gentype value, uint clustersize ) +---- + | Returns the bitwise *and*, *or*, or *xor* of _value_ for all active work + items in the sub-group within a cluster of the specified _clustersize_. +|==== + + +===== Logical Operations + +The table below describes the OpenCL C programming language built-in +functions to perform simple logical operations across a cluster of work +items in a sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For these functions, a non-zero _predicate_ argument or return value is +logically `true` and a zero _predicate_ argument or return value is +logically `false`. + +[[table-clustered-reduce-logical-functions]] +.Built-in Logical Functions for Sub-Groups +[cols="3a,2",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +int sub_group_clustered_reduce_logical_and( + int predicate, uint clustersize ) +int sub_group_clustered_reduce_logical_or( + int predicate, uint clustersize ) +int sub_group_clustered_reduce_logical_xor( + int predicate, uint clustersize ) +---- + | Returns the logical *and*, *or*, or *xor* of _predicate_ for all active + work items in the sub-group within a cluster of the specified + _clustersize_. +|==== + +endif::cl_khr_subgroup_clustered_reduce[] + + +ifdef::cl_khr_subgroup_non_uniform_arithmetic[] +==== Built-in Non-Uniform Scan and Reduction Functions for Sub-Groups + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +===== Arithmetic Operations + +The <> describes the +OpenCL C programming language built-in functions that perform simple +arithmetic operations across work items in a sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-non-uniform-math-functions]] +.Built-in Non-Uniform Arithmetic Functions for Sub-Groups +[cols="3a,2",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_non_uniform_reduce_add( + gentype value ) +gentype sub_group_non_uniform_reduce_min( + gentype value ) +gentype sub_group_non_uniform_reduce_max( + gentype value ) +gentype sub_group_non_uniform_reduce_mul( + gentype value ) +---- + | Returns the summation, multiplication, minimum, or maximum of _value_ + for all active work items in the sub-group. + + Note: This behavior is the same as the *add*, *min*, and *max* reduction + built-in functions from `<>` and OpenCL 2.1, except + these functions support additional types and need not be encountered by + all work items in the sub-group executing the kernel. +|[source,opencl_c] +---- +gentype sub_group_non_uniform_scan_inclusive_add( + gentype value ) +gentype sub_group_non_uniform_scan_inclusive_min( + gentype value ) +gentype sub_group_non_uniform_scan_inclusive_max( + gentype value ) +gentype sub_group_non_uniform_scan_inclusive_mul( + gentype value ) +---- + | Returns the result of an inclusive scan operation, which is the + summation, multiplication, minimum, or maximum of _value_ for all active + work items in the sub-group with a sub-group local ID less than or equal + to this work item's sub-group local ID. + + Note: This behavior is the same as the *add*, *min*, and *max* inclusive + scan built-in functions from `<>` and OpenCL 2.1, + except these functions support additional types and need not be + encountered by all work items in the sub-group executing the kernel. +|[source,opencl_c] +---- +gentype sub_group_non_uniform_scan_exclusive_add( + gentype value ) +gentype sub_group_non_uniform_scan_exclusive_min( + gentype value ) +gentype sub_group_non_uniform_scan_exclusive_max( + gentype value ) +gentype sub_group_non_uniform_scan_exclusive_mul( + gentype value ) +---- + | Returns the result of an exclusive scan operation, which is the + summation, multiplication, minimum, or maximum of _value_ for all active + work items in the sub-group with a sub-group local ID less than this + work item's sub-group local ID. + + If there is no active work item in the sub-group with a sub-group local + ID less than this work item's sub-group local ID then an identity value + `I` is returned. + For *add*, the identity value is `0`. + For *min*, the identity value is the largest representable value for + integer types, or `+INF` for floating-point types. + For *max*, the identity value is the minimum representable value for + integer types, or `-INF` for floating-point types. + For *mul*, the identity value is `1`. + + Note: This behavior is the same as the *add*, *min*, and *max* exclusive + scan built-in functions from `<>` and OpenCL 2.1, + except these functions support additional types and need not be + encountered by all work items in the sub-group executing the kernel. +|==== + +Note: The order of floating-point operations is not guaranteed for the +sub-group scan and reduction built-in functions that operate on +floating-point types, and the order of operations may additionally be +non-deterministic for a given sub-group. + + +===== Bitwise Operations + +The table below describes the OpenCL C programming language built-in +functions that perform simple bitwise integer operations across work items +in a sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, and `ulong`. + +[[table-non-uniform-bitwise-functions]] +.Built-in Non-Uniform Bitwise Functions for Sub-Groups +[cols="3a,2",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_non_uniform_reduce_and( + gentype value ) +gentype sub_group_non_uniform_reduce_or( + gentype value ) +gentype sub_group_non_uniform_reduce_xor( + gentype value ) +---- + | Returns the bitwise *and*, *or*, or *xor* of _value_ for all active work + items in the sub-group. +|[source,opencl_c] +---- +gentype sub_group_non_uniform_scan_inclusive_and( + gentype value ) +gentype sub_group_non_uniform_scan_inclusive_or( + gentype value ) +gentype sub_group_non_uniform_scan_inclusive_xor( + gentype value ) +---- + | Returns the result of an inclusive scan operation, which is the bitwise + *and*, *or*, or *xor* of _value_ for all active work items in the + sub-group with a sub-group local ID less than or equal to this work + item's sub-group local ID. +|[source,opencl_c] +---- +gentype sub_group_non_uniform_scan_exclusive_and( + gentype value ) +gentype sub_group_non_uniform_scan_exclusive_or( + gentype value ) +gentype sub_group_non_uniform_scan_exclusive_xor( + gentype value ) +---- + | Returns the result of an exclusive scan operation, which is the bitwise + *and*, *or*, or *xor* of _value_ for all active work items in the + sub-group with a sub-group local ID less than this work item's sub-group + local ID. + + If there is no active work item in the sub-group with a sub-group local + ID less than this work item's sub-group local ID then an identity value + `I` is returned. + For *and*, the identity value is `~0` (all bits set). + For *or* and *xor*, the identity value is `0`. +|==== + + +===== Logical Operations + +The table below describes the OpenCL C programming language built-in +functions that perform simple logical operations across work items in a +sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For these functions, a non-zero _predicate_ argument or return value is +logically `true` and a zero _predicate_ argument or return value is +logically `false`. + +[[table-non-uniform-logical-functions]] +.Built-in Non-Uniform Logical Functions for Sub-Groups +[cols="2a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +int sub_group_non_uniform_reduce_logical_and( + int predicate ) +int sub_group_non_uniform_reduce_logical_or( + int predicate ) +int sub_group_non_uniform_reduce_logical_xor( + int predicate ) +---- + | Returns the logical *and*, *or*, or *xor* of _predicate_ for all active + work items in the sub-group. +|[source,opencl_c] +---- +int sub_group_non_uniform_scan_inclusive_logical_and( + int predicate ) +int sub_group_non_uniform_scan_inclusive_logical_or( + int predicate ) +int sub_group_non_uniform_scan_inclusive_logical_xor( + int predicate ) +---- + | Returns the result of an inclusive scan operation, which is the logical + *and*, *or*, or *xor* of _predicate_ for all active work items in the + sub-group with a sub-group local ID less than or equal to this work + item's sub-group local ID. +|[source,opencl_c] +---- +int sub_group_non_uniform_scan_exclusive_logical_and( + int predicate ) +int sub_group_non_uniform_scan_exclusive_logical_or( + int predicate ) +int sub_group_non_uniform_scan_exclusive_logical_xor( + int predicate ) +---- + | Returns the result of an exclusive scan operation, which is the logical + *and*, *or*, or *xor* of _predicate_ for all active work items in the + sub-group with a sub-group local ID less than this work item's sub-group + local ID. + + If there is no active work item in the sub-group with a sub-group local + ID less than this work item's sub-group local ID then an identity value + `I` is returned. + For *and*, the identity value is `true` (non-zero). + For *or* and *xor*, the identity value is `false` (zero). +|==== + +endif::cl_khr_subgroup_non_uniform_arithmetic[] + + +ifdef::cl_khr_subgroup_non_uniform_vote[] +==== Built-in Non-Uniform Vote Functions for Sub-Groups + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +The <> describes the +OpenCL C programming language built-in functions to elect a single work item +in a sub-group to perform a task and to collectively vote to determine a +boolean condition for the sub-group. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be the one of +the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-non-uniform-vote-functions]] +.Built-in Non-Uniform Vote Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +int sub_group_elect() +---- + | Elects a single work item in the sub-group to perform a task. + + This function will return true (nonzero) for the active work item in the + sub-group with the smallest sub-group local ID, and false (zero) for all + other active work items in the sub-group. +|[source,opencl_c] +---- +int sub_group_non_uniform_all( + int predicate ) +---- + | Examines _predicate_ for all active work items in the sub-group and + returns a non-zero value if _predicate_ is non-zero for all active work + items in the sub-group and zero otherwise. + + Note: This behavior is the same as `sub_group_all` from + `<>` and OpenCL 2.1, except this function need not be + encountered by all work items in the sub-group executing the kernel. +|[source,opencl_c] +---- +int sub_group_non_uniform_any( + int predicate ) +---- + | Examines _predicate_ for all active work items in the sub-group and + returns a non-zero value if _predicate_ is non-zero for any active work + item in the sub-group and zero otherwise. + + Note: This behavior is the same as `sub_group_any` from + `<>` and OpenCL 2.1, except this function need not be + encountered by all work items in the sub-group executing the kernel. +|[source,opencl_c] +---- +int sub_group_non_uniform_all_equal( + gentype value ) +---- + | Examines _value_ for all active work items in the sub-group and returns + a non-zero value if _value_ is equivalent for all active invocations in + the sub-group and zero otherwise. + + Integer types use a bitwise test for equality. Floating-point types use + an ordered floating-point test for equality. +|==== + +endif::cl_khr_subgroup_non_uniform_vote[] + + +ifdef::cl_khr_subgroup_rotate[] +[[sub-group-rotate-functions]] +==== Built-in Sub-Group Rotation Functions + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +The <> describes a specialized +OpenCL C programming language built-in function that allow work items in a +sub-group to exchange data. +This function need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-rotate-functions]] +.Built-in Rotation Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description + +|[source,opencl_c] +---- +gentype sub_group_rotate( + gentype value, int delta) +---- + | Returns _value_ for the work item with sub-group local ID equal to the + remainder of the division of the sum of this work item's sub-group local + ID and _delta_ by the maximum sub-group size. + + The value of _delta_ is required to be dynamically-uniform for all work + items in the sub-group, otherwise the behavior is undefined. + + The return value is undefined if the work item with sub-group local ID + equal to the calculated index is inactive. +|[source,opencl_c] +---- +gentype sub_group_clustered_rotate( + gentype value, int delta, + uint clustersize) +---- + | Returns _value_ for the work item with sub-group local ID equal to the + sum of, the remainder of the division of the sum of this work item's ID + within the cluster and _delta_ by _clustersize_, and the sub-group local + ID of the first work-item of the cluster to which the work-item + executing the function belongs. + + The value of _delta_ is required to be dynamically-uniform for all work + items in the sub-group, otherwise the behavior is undefined. + + _clustersize_ must be an integer constant expression and a power of two, + smaller than or equal to the maximum sub-group size, otherwise the + behavior is undefined. + + The return value is undefined if the work item with sub-group local ID + equal to the calculated index is inactive. +|==== + +endif::cl_khr_subgroup_rotate[] + + +ifdef::cl_khr_subgroup_shuffle[] +==== Built-in Shuffle Functions for Sub-Groups + +NOTE: The functionality described in this section <> +support for the `<>` extension. + +The <> describes the OpenCL C +programming language built-in functions that allow work items in a sub-group +to exchange data. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-shuffle-functions]] +.Built-in Shuffle Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_shuffle( + gentype value, uint index ) +---- + | Returns _value_ for the work item with sub-group local ID equal to + _index_. + The shuffle _index_ need not be the same for all work items in the + sub-group. + + The return value is undefined if the work item with sub-group local ID + equal to _index_ is inactive or if _index_ is greater than or equal to + the size of the sub-group. +|[source,opencl_c] +---- +gentype sub_group_shuffle_xor( + gentype value, uint mask ) +---- + | Returns _value_ for the work item with sub-group local ID equal to + this work item's sub-group local ID xor'd with _mask_. + The shuffle _mask_ need not be the same for all work items in the + sub-group. + + The return value is undefined if the work item with sub-group local ID + equal to the calculated index is inactive or if the calculated index is + greater than or equal to the size of the sub-group. + + This is a specialized function that may perform better than the + equivalent `sub_group_shuffle` on some implementations. +|==== + +endif::cl_khr_subgroup_shuffle[] + + +ifdef::cl_khr_subgroup_shuffle_relative[] +==== Add a new Section 6.15.X - Sub-Group Relative Shuffle Built-in Functions + +The table below describes specialized OpenCL C programming language built-in +functions that allow work items in a sub-group to exchange data. +These functions need not be encountered by all work items in a sub-group +executing the kernel. +For the functions below, the generic type name `gentype` may be one of the +supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, +`int`, `uint`, `long`, `ulong`, `float`, `half` +footnote:[{fn-half-supported}], and `double` +footnote:[{fn-double-supported}]. + +[[table-shuffle-relative-functions]] +.Built-in Relative Shuffle Functions for Sub-Groups +[cols="1a,1",options="header",] +|==== +| Function | Description +|[source,opencl_c] +---- +gentype sub_group_shuffle_up( + gentype value, uint delta ) +---- + | Returns _value_ for the work item with sub-group local ID equal to this + work item's sub-group local ID minus _delta_. + The shuffle _delta_ need not be the same for all work items in the + sub-group. + + The return value is undefined if the work item with sub-group local ID + equal to the calculated index is inactive, or _delta_ is greater than + this work item's sub-group local ID. + + This is a specialized function that may perform better than the + equivalent `sub_group_shuffle` on some implementations. +|[source,opencl_c] +---- +gentype sub_group_shuffle_down( + gentype value, uint delta ) +---- + | Returns _value_ for the work item with sub-group local ID equal to this + work item's sub-group local ID plus _delta_. + The shuffle _delta_ need not be the same for all work items in the + sub-group. + + The return value is undefined if the work item with sub-group local ID + equal to the calculated index is inactive, or this work item's sub-group + local ID plus _delta_ is greater than or equal to the size of the + sub-group. + + This is a specialized function that may perform better than the + equivalent `sub_group_shuffle` on some implementations. +|==== +endif::cl_khr_subgroup_shuffle_relative[] + + +[[extended-sub-groups-mapping]] +=== Sub-Groups Function Mapping and Capabilities + +This section describes a possible mapping between OpenCL built-in sub-group functions +and SPIR-V instructions and required SPIR-V capabilities. + +This section is informational and non-normative. + +[cols="1,1,1",options="header"] +|==== +| OpenCL C Function | SPIR-V BuiltIn or Instruction | Enabling SPIR-V Capability + +3+| For OpenCL 2.1 or `<>`: + +| `get_​sub_​group_​size` + | *SubgroupSize* + | *Kernel* +| `get_​max_​sub_​group_​size` + | *SubgroupMaxSize* + | *Kernel* +| `get_​num_​sub_​groups` + | *NumSubgroups* + | *Kernel* +| `get_​enqueued_​num_​sub_​groups` + | *NumEnqueuedSubgroups* + | *Kernel* +| `get_​sub_​group_​id` + | *SubgroupId* + | *Kernel* +| `get_​sub_​group_​local_​id` + | *SubgroupLocalInvocationId* + | *Kernel* + +| `sub_​group_​barrier` + | *OpControlBarrier* + | None Needed + +| `sub_​group_​all` + | *OpGroupAll* + | *Groups* +| `sub_​group_​any` + | *OpGroupAny* + | *Groups* + +| `sub_​group_​broadcast` + | *OpGroupBroadcast* + | *Groups* + +| `sub_​group_​reduce_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​reduce_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​reduce_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* + +| `sub_​group_​scan_​exclusive_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​scan_​exclusive_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​scan_​exclusive_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* + +| `sub_​group_​scan_​inclusive_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​scan_​inclusive_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​scan_​inclusive_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* + +| `sub_​group_​reserve_​read_​pipe` + | *OpGroupReserveReadPipePackets* + | *Pipes* +| `sub_​group_​reserve_​write_​pipe` + | *OpGroupReserveReadWritePackets* + | *Pipes* +| `sub_​group_​commit_​read_​pipe` + | *OpGroupCommitReadPipe* + | *Pipes* +| `sub_​group_​commit_​write_​pipe` + | *OpGroupCommitWritePipe* + | *Pipes* + +| `get_​kernel_​sub_​group_​count_​for_​ndrange` + | *OpGetKernelNDrangeSubGroupCount* + | *DeviceEnqueue* +| `get_​kernel_​max_​sub_​group_​size_​for_​ndrange` + | *OpGetKernelNDrangeMaxSubGroupSize* + | *DeviceEnqueue* + +ifdef::cl_khr_subgroup_ballot[] +3+| For `<>`: + +| `sub_​group_​non_​uniform_​broadcast` + | *OpGroupNonUniformBroadcast* + | *GroupNonUniformBallot* +| `sub_​group_​broadcast_​first` + | *OpGroupNonUniformBroadcastFirst* + | *GroupNonUniformBallot* + +| `sub_​group_​ballot` + | *OpGroupNonUniformBallot* + | *GroupNonUniformBallot* +| `sub_​group_​inverse_​ballot` + | *OpGroupNonUniformInverseBallot* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​bit_​extract` + | *OpGroupNonUniformBallotBitExtract* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​bit_​count` + | *OpGroupNonUniformBallotBitCount* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​inclusive_​scan` + | *OpGroupNonUniformBallotBitCount* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​exclusive_​scan` + | *OpGroupNonUniformBallotBitCount* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​find_​lsb` + | *OpGroupNonUniformBallotFindLSB* + | *GroupNonUniformBallot* +| `sub_​group_​ballot_​find_​msb` + | *OpGroupNonUniformBallotFindMSB* + | *GroupNonUniformBallot* + +| `get_​sub_​group_​eq_​mask` + | *SubgroupEqMask* + | *GroupNonUniformBallot* +| `get_​sub_​group_​ge_​mask` + | *SubgroupGeMask* + | *GroupNonUniformBallot* +| `get_​sub_​group_​gt_​mask` + | *SubgroupGtMask* + | *GroupNonUniformBallot* +| `get_​sub_​group_​le_​mask` + | *SubgroupLeMask* + | *GroupNonUniformBallot* +| `get_​sub_​group_​lt_​mask` + | *SubgroupLtMask* + | *GroupNonUniformBallot* +endif::cl_khr_subgroup_ballot[] + +ifdef::cl_khr_subgroup_clustered_reduce[] +3+| For `<>`: + +| `sub_​group_​clustered_​reduce_​add` + | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​mul` + | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​min` + | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​max` + | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​and` + | *OpGroupNonUniformBitwiseAnd* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​or` + | *OpGroupNonUniformBitwiseOr* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​xor` + | *OpGroupNonUniformBitwiseXor* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​logical_​and` + | *OpGroupNonUniformLogicalAnd* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​logical_​or` + | *OpGroupNonUniformLogicalOr* + | *GroupNonUniformClustered* +| `sub_​group_​clustered_​reduce_​logical_​xor` + | *OpGroupNonUniformLogicalXor* + | *GroupNonUniformClustered* +endif::cl_khr_subgroup_clustered_reduce[] + +ifdef::cl_khr_subgroup_extended_types[] +3+| For `<>`: + + Note: This extension adds new types to uniform sub-group operations. + +| `sub_​group_​broadcast` + | *OpGroupBroadcast* + | *Groups* + +| `sub_​group_​reduce_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​reduce_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​reduce_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* + +| `sub_​group_​scan_​exclusive_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​scan_​exclusive_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​scan_​exclusive_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* + +| `sub_​group_​scan_​inclusive_​add` + | *OpGroupIAdd*, *OpGroupFAdd* + | *Groups* +| `sub_​group_​scan_​inclusive_​min` + | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* + | *Groups* +| `sub_​group_​scan_​inclusive_​max` + | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* + | *Groups* +endif::cl_khr_subgroup_extended_types[] + +ifdef::cl_khr_subgroup_non_uniform_arithmetic[] +3+| For `<>`: + +| `sub_​group_​non_​uniform_​reduce_​add` + | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​mul` + | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​min` + | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​max` + | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​and` + | *OpGroupNonUniformBitwiseAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​or` + | *OpGroupNonUniformBitwiseOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​xor` + | *OpGroupNonUniformBitwiseXor* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​logical_​and` + | *OpGroupNonUniformLogicalAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​logical_​or` + | *OpGroupNonUniformLogicalOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​reduce_​logical_​xor` + | *OpGroupNonUniformLogicalXor* + | *GroupNonUniformArithmetic* + +| `sub_​group_​non_​uniform_​scan_​inclusive_​add` + | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​mul` + | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​min` + | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​max` + | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​and` + | *OpGroupNonUniformBitwiseAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​or` + | *OpGroupNonUniformBitwiseOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​xor` + | *OpGroupNonUniformBitwiseXor* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​and` + | *OpGroupNonUniformLogicalAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​or` + | *OpGroupNonUniformLogicalOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​xor` + | *OpGroupNonUniformLogicalXor* + | *GroupNonUniformArithmetic* + +| `sub_​group_​non_​uniform_​scan_​exclusive_​add` + | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​exclusive_​mul` + | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​scan_​exclusive_​min` + | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​max` + | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​and` + | *OpGroupNonUniformBitwiseAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​or` + | *OpGroupNonUniformBitwiseOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​xor` + | *OpGroupNonUniformBitwiseXor* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​and` + | *OpGroupNonUniformLogicalAnd* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​or` + | *OpGroupNonUniformLogicalOr* + | *GroupNonUniformArithmetic* +| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​xor` + | *OpGroupNonUniformLogicalXor* + | *GroupNonUniformArithmetic* +endif::cl_khr_subgroup_non_uniform_arithmetic[] + +ifdef::cl_khr_subgroup_non_uniform_vote[] +3+| For `<>`: + +| `sub_​group_​elect` + | *OpGroupNonUniformElect* + | *GroupNonUniform* +| `sub_​group_​non_​uniform_​all` + | *OpGroupNonUniformAll* + | *GroupNonUniformVote* +| `sub_​group_​non_​uniform_​any` + | *OpGroupNonUniformAny* + | *GroupNonUniformVote* +| `sub_​group_​non_​uniform_​all_​equal` + | *OpGroupNonUniformAllEqual* + | *GroupNonUniformVote* +endif::cl_khr_subgroup_non_uniform_vote[] + +ifdef::cl_khr_subgroup_shuffle[] +3+| For `<>`: + +| `sub_​group_​shuffle` + | *OpGroupNonUniformShuffle* + | *GroupNonUniformShuffle* +| `sub_​group_​shuffle_​xor` + | *OpGroupNonUniformShuffleXor* + | *GroupNonUniformShuffle* +endif::cl_khr_subgroup_shuffle[] + +ifdef::cl_khr_subgroup_shuffle_relative[] +3+| For `<>`: + +| `sub_​group_​shuffle_​up` + | *OpGroupNonUniformShuffleUp* + | *GroupNonUniformShuffleRelative* +| `sub_​group_​shuffle_​down` + | *OpGroupNonUniformShuffleDown* + | *GroupNonUniformShuffleRelative* +endif::cl_khr_subgroup_shuffle_relative[] + +|==== + + [[opencl-numerical-compliance]] = OpenCL Numerical Compliance @@ -11184,7 +15230,7 @@ standards that must be supported by all OpenCL compliant devices. This section describes the functionality that must be supported by all OpenCL devices for single precision floating-point numbers. Currently, only single precision floating-point is a requirement. -Double precision floating-point is an optional feature. +Double-precision floating-point is an optional feature. [[rounding-modes-1]] @@ -11200,13 +15246,103 @@ IEEE 754 defines four possible rounding modes: * Round toward zero _Round to nearest_ _even_ is currently the only rounding mode required by the -OpenCL specification for single precision and double precision operations and is +OpenCL specification for single precision and double-precision operations and is therefore the default rounding mode footnote:[{fn-float-required-rounding-mode}]. In addition, only static selection of rounding mode is supported. Dynamically reconfiguring the rounding modes as specified by the IEEE 754 spec is unsupported. +ifdef::cl_khr_fp16[] +If the `<>` extension macro is supported, then +if `CL_FP_ROUND_TO_NEAREST` is supported, the default rounding mode for +half-precision floating-point operations will be round to nearest even; +otherwise the default rounding mode will be round to zero. + +Conversions to half floating-point format must be correctly rounded using +the indicated `convert` operator rounding mode or the default rounding mode +for half-precision floating-point operations if no rounding mode is +specified by the operator, or a C-style cast is used. + +Conversions from half to integer format shall correctly round using the +indicated `convert` operator rounding mode, or towards zero if no rounding +mode is specified by the operator or a C-style cast is used. +All conversions from half to floating-point formats are exact. +endif::cl_khr_fp16[] + +ifdef::cl_khr_select_fprounding_mode[] +[open,refpage='SELECT_ROUNDING_MODE',desc='Select rounding mode for a group of instructions',type='freeform',spec='clang',anchor='select-rounding-mode-macro',xrefs='fpMacros'] +-- +[[select-rounding-mode]] + +If the `<>` extension macro is supported, the +floating-point rounding mode may be specified using the following *#pragma* +in the OpenCL program source: + +[source,opencl_c] +---- +#pragma OPENCL SELECT_ROUNDING_MODE +---- + +The __ may be one of the following values: + + * *rte* - round to nearest even + * *rtz* - round to zero + * *rtp* - round to positive infinity + * *rtn* - round to negative infinity + +If this extensions is supported then the OpenCL implementation must support +all four rounding modes for single precision floating-point. + +The *#pragma* sets the rounding mode for all instructions that operate on +floating-point types (scalar or vector types) or produce floating-point +values that follow this pragma in the program source until the next +*#pragma*. +Note that the rounding mode specified for a block of code is known at +compile time. +When inside a compound statement, the pragma takes effect from its +occurrence until another *#pragma* is encountered (including within a nested +compound statement), or until the end of the compound statement; at the end +of a compound statement the state for the pragma is restored to its +condition just before the compound statement. +Except where otherwise documented, the callee functions do not inherit the +rounding mode of the caller function. + +If the `<>` extension is enabled, the +`\\__ROUNDING_MODE__` preprocessor symbol shall be defined to be one of the +following according to the current rounding mode: + +[source,opencl_c] +---- +#define __ROUNDING_MODE__ rte +#define __ROUNDING_MODE__ rtz +#define __ROUNDING_MODE__ rtp +#define __ROUNDING_MODE__ rtz +---- + +This is intended to enable remapping `foo()` to `foo_rte()` by the +preprocessor by using: + +[source,opencl_c] +---- +#define foo foo ## __ROUNDING_MODE__ +---- + +The default rounding mode is round to nearest even. +The <>, <>, and <> are +implemented with the round to nearest even rounding mode. +Various built-in conversions and the *vstore_half* and *vstorea_half* +built-in functions that do not specify a rounding mode inherit the current +rounding mode. +Conversions from floating-point to integer type always use `rtz` mode, +except where the user specifically asks for another rounding mode. + +NOTE: The `<>` extension was deprecated in +OpenCL 1.1, and its use is not recommended. +-- +endif::cl_khr_select_fprounding_mode[] + [[inf-nan-and-denormalized-numbers]] == INF, NaN and Denormalized Numbers @@ -11270,6 +15406,13 @@ compliant and are therefore correctly rounded. Conversion between floating-point formats and <> must be correctly rounded. +ifdef::cl_khr_fp16[] +If the `<>` extension macro is supported, +addition, subtraction, multiplication, fused multiply-add operations on half +types are required to be correctly rounded using the default rounding mode +for half-precision floating-point operations. +endif::cl_khr_fp16[] + The ULP is defined as follows: ==== @@ -11301,10 +15444,10 @@ floating-point number that would be representable after the finite maximum, if there was sufficient range, meets ULP error tolerance. [[table-ulp-float-math]] -.ULP values for single precision built-in math functions -[cols=",",] +.ULP Values for Single-Precision Built-in Math Functions +[cols=",",options="header",] |==== -| *Function* | *Min Accuracy - ULP values* +| Function | Min Accuracy - ULP values | _x_ + _y_ | Correctly rounded | _x_ - _y_ | Correctly rounded | _x_ * _y_ | Correctly rounded @@ -11487,10 +15630,10 @@ is the infinitely precise result. 0 ulp is used for math functions that do not require rounding. [[table-ulp-embedded]] -.ULP values for the embedded profile -[cols=",",] +.ULP Values for the Embedded Profile +[cols=",",options="header",] |==== -| *Function* | *Min Accuracy - ULP values* +| Function | Min Accuracy - ULP values | _x_ + _y_ | Correctly rounded | _x_ - _y_ | Correctly rounded | _x_ * _y_ | Correctly rounded @@ -11633,11 +15776,10 @@ operations and builtins with `-cl-unsafe-math-optimizations` <> support for OpenCL C 2.0 or newer. [[table-float-ulp-relaxed]] -.ULP values for single precision built-in math functions with unsafe math optimizations in the full and embedded profiles -[cols="3,7",] +.ULP Values for Single-Precision Built-in Math Functions With Unsafe Math Optimizations in the Full and Embedded Profiles +[cols="3,7",options="header",] |==== -| *Function* -| *Minimum Accuracy* +| Function | Minimum Accuracy | 1.0 / _x_ | {leq} 2.5 ulp for _x_ in the domain of 2^-126^ to 2^126^ for the full @@ -11823,17 +15965,17 @@ requires>> support for OpenCL C 2.0 or newer. an add both of which are correctly rounded. |==== -The following table describes the minimum accuracy of double precision +The following table describes the minimum accuracy of double-precision floating-point arithmetic operations given as ULP values. The reference value used to compute the ULP value of an arithmetic operation is the infinitely precise result. 0 ulp is used for math functions that do not require rounding. [[table-ulp-double]] -.ULP values for double precision built-in math functions -[cols=",",] +.ULP Values for Double-Precision Built-in Math Functions +[cols=",",options="header",] |==== -| *Function* | *Min Accuracy - ULP values* +| Function | Min Accuracy - ULP values | _x_ + _y_ | Correctly rounded | _x_ - _y_ | Correctly rounded | _x_ * _y_ | Correctly rounded @@ -11962,6 +16104,166 @@ is the infinitely precise result. |==== +ifdef::cl_khr_fp16[] +If the `<>` extension macro is supported, +the following table describes the minimum accuracy of half-precision +floating-point arithmetic operations given as ULP values. +The reference value used to compute the ULP value of an arithmetic operation +is the infinitely precise result. +0 ulp is used for math functions that do not require rounding. + +[[table-ulp-half-math]] +.ULP Values for Half-Precision Floating-Point Arithmetic Operations +[cols=",,",options="header",] +|==== +| Function | Min Accuracy - Full Profile | Min Accuracy - Embedded Profile +| *_x_ + _y_* | Correctly rounded | Correctly rounded +| *_x_ - _y_* | Correctly rounded | Correctly rounded +| *_x_ * _y_* | Correctly rounded | Correctly rounded +| *1.0 / _x_* | Correctly rounded | \<= 1 ulp +| *_x_ / _y_* | Correctly rounded | \<= 1 ulp +| | | +| *acos* | \<= 2 ulp | \<= 3 ulp +| *acosh* | \<= 2 ulp | \<= 3 ulp +| *acospi* | \<= 2 ulp | \<= 3 ulp +| *asin* | \<= 2 ulp | \<= 3 ulp +| *asinh* | \<= 2 ulp | \<= 3 ulp +| *asinpi* | \<= 2 ulp | \<= 3 ulp +| *atan* | \<= 2 ulp | \<= 3 ulp +| *atanh* | \<= 2 ulp | \<= 3 ulp +| *atanpi* | \<= 2 ulp | \<= 3 ulp +| *atan2* | \<= 2 ulp | \<= 3 ulp +| *atan2pi* | \<= 2 ulp | \<= 3 ulp +| *cbrt* | \<= 2 ulp | \<= 2 ulp +| *ceil* | Correctly rounded | Correctly rounded +| *clamp* | 0 ulp | 0 ulp +| *copysign* | 0 ulp | 0 ulp +| *cos* | \<= 2 ulp | \<= 2 ulp +| *cosh* | \<= 2 ulp | \<= 3 ulp +| *cospi* | \<= 2 ulp | \<= 2 ulp + +// 3 operations from the 2 multiplications and 1 subtraction per component +| *cross* + | absolute error tolerance of 'max * max * (3 * HALF_EPSILON)' per vector + component, where _max_ is the maximum input operand magnitude + | Implementation-defined +| *degrees* | \<= 2 ulp | \<= 2 ulp + +// 0.5 ULP error in sqrt +// 1.5 * n cumulative error for multiplications +// 0.5 * (n-1) cumulative error for additions +// +// = 0.5 + (1.5 * n) + (0.5 * (n - 1)) +// = 0.5 + 1.5n + (0.5n - 0.5) +// = 2n +| *distance* + | \<= 2n ulp, for gentype with vector width _n_ + | Implementation-defined + +// n + n-1 Number of operations from n multiples and (n-1) additions +// 2n - 1 +| *dot* + | absolute error tolerance of 'max * max * (2n - 1) * HALF_EPSILON', for + vector width _n_ and maximum input operand magnitude _max_ across all + vector components + | Implementation-defined + +| *erfc* | \<= 4 ulp | \<= 4 ulp +| *erf* | \<= 4 ulp | \<= 4 ulp +| *exp* | \<= 2 ulp | \<= 3 ulp +| *exp2* | \<= 2 ulp | \<= 3 ulp +| *exp10* | \<= 2 ulp | \<= 3 ulp +| *expm1* | \<= 2 ulp | \<= 3 ulp +| *fabs* | 0 ulp | 0 ulp +| *fdim* | Correctly rounded | Correctly rounded +| *floor* | Correctly rounded | Correctly rounded +| *fma* | Correctly rounded | Correctly rounded +| *fmax* | 0 ulp | 0 ulp +| *fmin* | 0 ulp | 0 ulp +| *fmod* | 0 ulp | 0 ulp +| *fract* | Correctly rounded | Correctly rounded +| *frexp* | 0 ulp | 0 ulp +| *hypot* | \<= 2 ulp | \<= 3 ulp +| *ilogb* | 0 ulp | 0 ulp +| *ldexp* | Correctly rounded | Correctly rounded + +// 0.5 ULP error in sqrt +// 0.5 effect on e of taking sqrt(x + e) +// 0.5 * n cumulative error for multiplications +// 0.5 * (n-1) cumulative error for additions +// +// = (0.5 + 0.5 * ((0.5 * n) + (0.5 * (n - 1)))) +// = 0.5 + 0.5 * (n - 0.5) +// = 0.25 + 0.5n +| *length* + | \<= 0.25 + 0.5n ulp, for gentype with vector width _n_ + | Implementation-defined +| *log* | \<= 2 ulp | \<= 3 ulp +| *log2* | \<= 2 ulp | \<= 3 ulp +| *log10* | \<= 2 ulp | \<= 3 ulp +| *log1p* | \<= 2 ulp | \<= 3 ulp +| *logb* | 0 ulp | 0 ulp +| *mad* | Implementation-defined | Implementation-defined +| *max* | 0 ulp | 0 ulp +| *maxmag* | 0 ulp | 0 ulp +| *min* | 0 ulp | 0 ulp +| *minmag* | 0 ulp | 0 ulp +| *mix* | Implementation-defined | Implementation-defined +| *modf* | 0 ulp | 0 ulp +| *nan* | 0 ulp | 0 ulp +| *nextafter* | 0 ulp | 0 ulp + +// 1.5 error in rsqrt + error in multiply +// 0.5 * n cumulative error for multiplications +// 0.5 * (n-1) cumulative error for additions +// +// = 1.5 + (0.5 * n) + (0.5 * (n - 1)) +// = 1.5 + 0.5n + (0.5n - 0.5) +// = 1.0 + n +| *normalize* + | \<= 1 + n ulp, for gentype with vector width _n_ + | Implementation-defined +| *pow(x, y)* | \<= 4 ulp | \<= 5 ulp +| *pown(x, y)* | \<= 4 ulp | \<= 5 ulp +| *powr(x, y)* | \<= 4 ulp | \<= 5 ulp +| *radians* | \<= 2 ulp | \<= 2 ulp +| *remainder* | 0 ulp | 0 ulp +| *remquo* + | 0 ulp for the remainder, at least the lower 7 bits of the integral + quotient + | 0 ulp for the remainder, at least the lower 7 bits of the integral + quotient +| *rint* | Correctly rounded | Correctly rounded +| *rootn* | \<= 4 ulp | \<= 5 ulp +| *round* | Correctly rounded | Correctly rounded +| *rsqrt* | \<=1 ulp | \<=1 ulp +| *sign* | 0 ulp | 0 ulp +| *sin* | \<= 2 ulp | \<= 2 ulp +| *sincos* + | \<= 2 ulp for sine and cosine values + | \<= 2 ulp for sine and cosine values +| *sinh* | \<= 2 ulp | \<= 3 ulp +| *sinpi* | \<= 2 ulp | \<= 2 ulp +| *smoothstep* | Implementation-defined | Implementation-defined +| *sqrt* | Correctly rounded | \<= 1 ulp +| *step* | 0 ulp | 0 ulp +| *tan* | \<= 2 ulp | \<= 3 ulp +| *tanh* | \<= 2 ulp | \<= 3 ulp +| *tanpi* | \<= 2 ulp | \<= 3 ulp +| *tgamma* | \<= 4 ulp | \<= 4 ulp +| *trunc* | Correctly rounded | Correctly rounded +|==== + +NOTE: _Implementations may perform floating-point operations on_ `half` +_scalar or vector data types by converting the_ `half` _values to single +precision floating-point values and performing the operation in single +precision floating-point. +In this case, the implementation will use the_ `half` _scalar or vector data +type as a storage only format_. + +endif::cl_khr_fp16[] + + [[edge-case-behavior]] == Edge Case Behavior @@ -12010,7 +16312,7 @@ For example, *sin*({plusmn}0) = {plusmn}0 shall be interpreted to mean * *atan2pi*(_y_, {plusmn}0) returns 0.5 for _y_ > 0. * *atan2pi*({plusmn}_y_, -{inf}) returns {plusmn}1 for finite _y_ > 0. * *atan2pi*({plusmn}_y_, +{inf}) returns {plusmn}0 for finite _y_ > 0. -* *atan2pi*({plusmn}{inf}, _x_) returns {plusmn}0.5 for finite _x._ +* *atan2pi*({plusmn}{inf}, _x_) returns {plusmn}0.5 for finite _x_. * *atan2pi*({plusmn}{inf}, -{inf}) returns {plusmn}0.75. * *atan2pi*({plusmn}{inf}, +{inf}) returns {plusmn}0.25. * *ceil*(-1 < _x_ < 0) returns -0. @@ -12064,7 +16366,7 @@ for (i = 0; i < sizeof(v) / sizeof(v[0]); i++) * *powr*({plusmn}0, _y_) is +{inf} for finite _y_ < 0. * *powr*({plusmn}0, -{inf}) is +{inf}. * *powr*({plusmn}0, _y_) is +0 for _y_ > 0. -* *powr*(+1, _y_) is 1 for finite _y._ +* *powr*(+1, _y_) is 1 for finite _y_. * *powr*(_x_, _y_) returns NaN for _x_ < 0. * *powr*({plusmn}0, {plusmn}0) returns NaN. * *powr*(+{inf}, {plusmn}0) returns NaN. @@ -12117,7 +16419,7 @@ if the caller is in some other rounding mode. [[edge-case-behavior-in-flush-to-zero-mode]] -=== Edge Case Behavior in Flush To Zero Mode +=== Edge Case Behavior in Flush to Zero Mode If denormals are flushed to zero, then a function may return one of four results: @@ -12218,9 +16520,9 @@ The following table describes the address_mode function. [[table-address-modes-texel-location]] .Addressing modes to generate texel location -[cols=",",] +[cols=",",options="header",] |==== -| *Addressing Mode* | *Result of address_mode(coord)* +| Addressing Mode | Result of address_mode(coord) | `CLK_ADDRESS_CLAMP_TO_EDGE` | clamp (coord, 0, size - 1) | `CLK_ADDRESS_CLAMP` | clamp (coord, -1, size) | `CLK_ADDRESS_NONE` | coord @@ -12573,14 +16875,14 @@ and writing images in a kernel. [[conversion-rules-for-normalized-integer-channel-data-types]] -=== Conversion rules for normalized integer channel data types +=== Conversion Rules for Normalized Integer Channel Data Types In this section we discuss converting normalized integer channel data types to floating-point values and vice-versa. [[converting-normalized-integer-channel-data-types-to-floating-point-values]] -==== Converting normalized integer channel data types to floating-point values +==== Converting Normalized Integer Channel Data Types to Floating-point Values For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, *read_imagef* will convert the channel values from an @@ -12620,7 +16922,7 @@ These conversions are performed as follows: * normalized `float` value = *max*(`-1.0f`, `(float)c / 32767.0f`) The precision of the above conversions is \<= 1.5 ulp except for the -following cases. +following cases: For `CL_UNORM_INT8` @@ -12655,8 +16957,87 @@ For `CL_SNORM_INT16` * 32767 must convert to `1.0f` +ifdef::cl_khr_fp16[] +[[converting-normalized-integer-channel-data-types-to-half-precision-floating-point-values]] +==== Converting Normalized Integer Channel Data Types to Half-Precision Floating-Point Values + +If the `<>` extension is supported, then +for images created with image channel data type of `CL_UNORM_INT8` and +`CL_UNORM_INT16`, *read_imageh* will convert the channel values from an +8-bit or 16-bit unsigned integer to normalized half-precision floating-point +values in the range [`0.0h`, `1.0h`]. + +For images created with image channel data type of `CL_SNORM_INT8` and +`CL_SNORM_INT16`, *read_imageh* will convert the channel values from an +8-bit or 16-bit signed integer to normalized half-precision floating-point +values in the range [`-1.0h`, `1.0h`]. + +These conversions are performed as follows: + +`CL_UNORM_INT8` (8-bit unsigned integer) {rightarrow} `half` + +[none] +* normalized `half` value = `round_to_half(c / 255)` + +`CL_UNORM_INT_101010` (10-bit unsigned integer) {rightarrow} `half` + +[none] +* normalized `half` value = `round_to_half(c / 1023)` + +`CL_UNORM_INT16` (16-bit unsigned integer) {rightarrow} `half` + +[none] +* normalized `half` value = `round_to_half(c / 65535)` + +`CL_SNORM_INT8` (8-bit signed integer) {rightarrow} `half` + +[none] +* normalized `half` value = *max*(`-1.0h`, `round_to_half(c / 127)`) + +`CL_SNORM_INT16` (16-bit signed integer) {rightarrow} `half` + +[none] +* normalized `half` value = *max*(`-1.0h`, `round_to_half(c / 32767)`) + +The precision of the above conversions is \<= 1.5 ulp except for the +following cases: + +For `CL_UNORM_INT8` + +[none] +* 0 must convert to `0.0h` and +* 255 must convert to `1.0h` + +For `CL_UNORM_INT_101010` + +[none] +* 0 must convert to `0.0h` and +* 1023 must convert to `1.0h` + +For `CL_UNORM_INT16` + +[none] +* 0 must convert to `0.0h` and +* 65535 must convert to `1.0h` + +For `CL_SNORM_INT8` + +[none] +* -128 and -127 must convert to `-1.0h`, +* 0 must convert to `0.0h` and +* 127 must convert to `1.0h` + +For `CL_SNORM_INT16` + +[none] +* -32768 and -32767 must convert to `-1.0h`, +* 0 must convert to `0.0h` and +* 32767 must convert to `1.0h` +endif::cl_khr_fp16[] + + [[converting-floating-point-values-to-normalized-integer-channel-data-types]] -==== Converting floating-point values to normalized integer channel data types +==== Converting Floating-Point Values to Normalized Integer Channel Data Types For images created with image channel data type of `CL_UNORM_INT8` and `CL_UNORM_INT16`, *write_imagef* will convert the floating-point color value @@ -12743,8 +17124,71 @@ the result produced by the round to nearest even rounding mode must be {leq} * *fabs*(f~preferred~ - f~approx~) must be \<= 0.6 +ifdef::cl_khr_fp16[] +[[converting-half-precision-floating-point-values-to-normalized-integer-channel-data-types]] +==== Converting Half-Precision Floating-point Values to Normalized Integer Channel Data Types + +If the `<>` extension is supported, then +for images created with image channel data type of `CL_UNORM_INT8` and +`CL_UNORM_INT16`, *write_imageh* will convert the floating-point color value +to an 8-bit or 16-bit unsigned integer. + +For images created with image channel data type of `CL_SNORM_INT8` and +`CL_SNORM_INT16`, *write_imageh* will convert the floating-point color value +to an 8-bit or 16-bit signed integer. + +The preferred conversion uses the round to nearest even (`_rte`) rounding +mode, but OpenCL implementations may choose to approximate the rounding mode +used in the conversions described below. +When approximate rounding is used instead of the preferred rounding, the +result of the conversion must satisfy the bound given below. + +`half` {rightarrow` `CL_UNORM_INT8` (8-bit unsigned integer) + +[none] + * Let f~exact~ = *max*(`0`, *min*(`f * 255`, `255`)) + * Let f~preferred~ = *convert_uchar_sat_rte*(`f * 255.0f`) + * Let f~approx~ = *convert_uchar_sat_*(`f * 255.0f`) + * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 + +`half` {rightarrow` `CL_UNORM_INT_101010` (10-bit unsigned integer) + +[none] + * Let f~exact~ = *max*(`0`, *min*(`f * 1023`, `1023`)) + * Let f~preferred~ = *min*(*convert_ushort_sat_rte*(`f * 1023.0f`), + `1023`) + * Let f~approx~ = *convert_ushort_sat_*(`f * 1023.0f`) + * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 + +`half` {rightarrow` `CL_UNORM_INT16` (16-bit unsigned integer) + +[none] + * Let f~exact~ = *max*(`0`, *min*(`f * 65535`, `65535`)) + * Let f~preferred~ = *convert_ushort_sat_rte*(`f * 65535.0f`) + * Let f~approx~ = *convert_ushort_sat_*(`f * + 65535.0f`) + * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 + +`half` {rightarrow` `CL_SNORM_INT8` (8-bit signed integer) + +[none] + * Let f~exact~ = *max*(`-128`, *min*(`f * 127`, `127`)) + * Let f~preferred~ = *convert_char_sat_rte*(`f * 127.0f`) + * Let f~approx~ = *convert_char_sat_*(`f * 127.0f`) + * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 + +`half` {rightarrow` `CL_SNORM_INT16` (16-bit signed integer) + +[none] + * Let f~exact~ = *max*(`-32768`, *min*(`f * 32767`, `32767`)) + * Let f~preferred~ = *convert_short_sat_rte*(`f * 32767.0f`) + * Let f~approx~ = *convert_short_sat_*(`f * 32767.0f`) + * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 +endif::cl_khr_fp16[] + + [[conversion-rules-for-half-precision-floating-point-channel-data-type]] -=== Conversion rules for half precision floating-point channel data type +=== Conversion Rules for Half-Precision Floating-Point Channel Data Type For images created with a channel data type of `CL_HALF_FLOAT`, the conversions from `half` to `float` are lossless (as described in @@ -12759,7 +17203,7 @@ type. [[conversion-rules-for-floating-point-channel-data-type]] -=== Conversion rules for floating-point channel data type +=== Conversion Rules for Floating-Point Channel Data Type The following rules apply for reading and writing images created with channel data type of `CL_FLOAT`. @@ -12770,7 +17214,7 @@ channel data type of `CL_FLOAT`. [[conversion-rules-for-signed-and-unsigned-8-bit-16-bit-and-32-bit-integer-channel-data-types]] -=== Conversion rules for signed and unsigned 8-bit, 16-bit and 32-bit integer channel data types +=== Conversion Rules for Signed and Unsigned 8-Bit, 16-Bit and 32-Bit Integer Channel Data Types Calls to *read_imagei* with channel data type values of `CL_SIGNED_INT8`, `CL_SIGNED_INT16` and `CL_SIGNED_INT32` return the unmodified integer values @@ -12818,7 +17262,7 @@ The conversions described in this section must be correctly saturated. [[conversion-rules-for-srgba-and-sbgra-images]] -=== Conversion rules for sRGBA and sBGRA images +=== Conversion Rules for sRGBA and sBGRA Images Standard RGB data, which roughly displays colors in a linear ramp of luminosity levels such that an average observer, under average viewing @@ -12895,7 +17339,7 @@ implementation produces and being checked for conformance. [[selecting-an-image-from-an-image-array]] -== Selecting an Image from an Image Array +== Selecting an Image From an Image Array Let (_u_,_v_,_w_) represent the unnormalized image coordinate values for reading from and/or writing to a 2D image in a 2D image array. @@ -12978,7 +17422,7 @@ one of the integers 0, 1, ... h~t~ - 1. include::c/appendix_a.asciidoc[] -// This is generatig asciidoctor errors: +// This is generating asciidoctor errors: // OpenCL_C.txt: Failed to load AsciiDoc document - undefined method `+' for nil:NilClass // Disabling acknowledgements for now. We have them in the API spec already. //<<< diff --git a/OpenCL_Ext.txt b/OpenCL_Ext.txt index a2e983ea..68e4295b 100644 --- a/OpenCL_Ext.txt +++ b/OpenCL_Ext.txt @@ -47,75 +47,9 @@ include::copyrights.txt[] include::ext/introduction.asciidoc[] -include::ext/cl_khr_icd.asciidoc[] -include::ext/cl_khr_byte_addressable_store.asciidoc[] -include::ext/cl_khr_3d_image_writes.asciidoc[] -include::ext/cl_khr_fp16.asciidoc[] -include::ext/cl_khr_fp64.asciidoc[] -include::ext/cl_khr_int32_atomics.asciidoc[] -include::ext/cl_khr_int64_atomics.asciidoc[] -include::ext/cl_khr_select_fprounding_mode.asciidoc[] -include::ext/cl_khr_gl_sharing__context.asciidoc[] -include::ext/cl_khr_gl_sharing__memobjs.asciidoc[] - -include::ext/cl_khr_gl_event.asciidoc[] -include::ext/cl_khr_d3d10_sharing.asciidoc[] - -include::ext/cl_khr_d3d11_sharing.asciidoc[] -include::ext/cl_khr_dx9_media_sharing.asciidoc[] -include::ext/cl_khr_depth_images.asciidoc[] -include::ext/cl_khr_gl_depth_images.asciidoc[] -include::ext/cl_khr_gl_msaa_sharing.asciidoc[] -include::ext/cl_khr_egl_event.asciidoc[] -include::ext/cl_khr_egl_image.asciidoc[] -include::ext/cl_khr_image2d_from_buffer.asciidoc[] -include::ext/cl_khr_initialize_memory.asciidoc[] -include::ext/cl_khr_terminate_context.asciidoc[] -include::ext/cl_khr_spir.asciidoc[] -include::ext/cl_khr_il_program.asciidoc[] -include::ext/cl_khr_create_command_queue.asciidoc[] - -include::ext/cl_khr_device_enqueue_local_arg_types.asciidoc[] -include::ext/cl_khr_subgroups.asciidoc[] -include::ext/cl_khr_mipmap_image.asciidoc[] -include::ext/cl_khr_srgb_image_writes.asciidoc[] - -include::ext/cl_khr_priority_hints.asciidoc[] -include::ext/cl_khr_throttle_hints.asciidoc[] - -include::ext/cl_khr_subgroup_named_barrier.asciidoc[] - -include::ext/cl_khr_extended_async_copies.asciidoc[] -include::ext/cl_khr_async_work_group_copy_fence.asciidoc[] - -include::ext/cl_khr_device_uuid.asciidoc[] -include::ext/cl_khr_extended_versioning.asciidoc[] - -include::ext/cl_khr_subgroup_extensions.asciidoc[] - -include::ext/cl_khr_pci_bus_info.asciidoc[] - -include::ext/cl_khr_extended_bit_ops.asciidoc[] -include::ext/cl_khr_suggested_local_work_size.asciidoc[] - -include::ext/cl_khr_integer_dot_product.asciidoc[] - -include::ext/cl_khr_semaphore.asciidoc[] - -include::ext/cl_khr_external_semaphore.asciidoc[] -include::ext/cl_khr_external_memory.asciidoc[] - -include::ext/cl_khr_command_buffer.asciidoc[] -include::ext/cl_khr_expect_assume.asciidoc[] - -include::ext/cl_khr_subgroup_rotate.asciidoc[] -include::ext/cl_khr_work_group_uniform_arithmetic.asciidoc[] - -include::ext/cl_khr_command_buffer_mutable_dispatch.asciidoc[] -include::ext/cl_khr_command_buffer_multi_device.asciidoc[] - -// NOTE: To keep meaningful section numbers, new -// extension documents should be added above here! +// khr extension specifications must now be written as conditionally +// included markup in the OpenCL API and C Language Specifications, rather +// than being included here as separate documents. // These are SPIR-V Extensions: diff --git a/README.adoc b/README.adoc index 410abb0b..0ef04fb0 100644 --- a/README.adoc +++ b/README.adoc @@ -142,6 +142,48 @@ build is very fast, even for the whole Specification, but PDF builds may take several minutes. +[[building-extensions]] +== Building With Extensions Included + +Invoking 'make' with no extra arguments will build the OpenCL API and OpenCL +C Language specifications with only the core APIs and functionality +included. +To build versions of these specifications with extension language included, +you should use the `makeSpec` script. +`makeSpec` is a Python script accepting the following arguments: + + * -spec _variant_ - _variant_ is `core`, `khr`, or `all`, building + specifications with only core, core + all khr extensions and core + all + extensions, respectively. + At present, `all` is equivalent to `khr` as only `khr` extensions are + included in the specification source. + * -ext _name_ - add the specified extension _name_ and its dependencies to + the build. + * -clean - clean generated files before building. + * -registry _path_ - use specified API XML instead of the default + `xml/cl.xml`. + * -v - verbose, print actions before executing them. + * -n - dry-run, print actions instead of executing them. + * Unrecognized options are passed on to `make`, so must be valid Makefile + targets or `make` options such as `-j`. + +Any other options reported by `makeSpec --help` are not yet functional, and +should not be used. + +For example, to build with all `khr` extensions, you could use + +[source,sh] +---- +$ makeSpec -clean -spec khr -j html refpages +---- + +`makeSpec` is a wrapper which constructs options and invokes `make` +when building with extensions included, which only affects building the API +(including reference pages) and C language specifications. +`makeSpec` determines extension dependencies from the metadata in the +specified registry XML path. + + [[refpage-install]] == Reference Page Installation @@ -688,6 +730,7 @@ by Khronos. [[history]] == Revision History + * 2024-04-07 - Add `makeSpec` instructions. * 2023-11-05 - Add hexapdf, remove ghostscript * 2020-03-13 - Updated package versions to match Travis build. * 2019-06-20 - Add directions for publishing OpenCL 2.2 reference pages, diff --git a/api/acknowledgements.asciidoc b/api/acknowledgements.asciidoc index 6df49ab0..72fec436 100644 --- a/api/acknowledgements.asciidoc +++ b/api/acknowledgements.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2016-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2016-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 = Acknowledgements diff --git a/api/appendix_a.asciidoc b/api/appendix_a.asciidoc index c2fc908d..0078df97 100644 --- a/api/appendix_a.asciidoc +++ b/api/appendix_a.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] = Host environment and thread safety @@ -73,7 +72,7 @@ The OpenCL implementation should be able to create multiple command-queues for a given OpenCL context and multiple OpenCL contexts in an application running on the host processor. -== Global constructors and destructors +== Global Constructors and Destructors The execution order of global constructors and destructors is left undefined by the C and C++ standards. It is therefore not possible to know the relative diff --git a/api/appendix_b.asciidoc b/api/appendix_b.asciidoc index 2759903f..c5698515 100644 --- a/api/appendix_b.asciidoc +++ b/api/appendix_b.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2016-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2016-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] = Portability @@ -242,12 +241,12 @@ vector machines OpenCL could have made it illegal to do a conversion free cast that changes the number of elements in the name of portability. However, while OpenCL provides a common set of operators drawing from the -set that are typically found on vector machines, it can not provide access +set that are typically found on vector machines, it cannot provide access to everything every ISA may offer in a consistent uniform portable manner. Many vector ISAs provide special purpose instructions that greatly accelerate specific operations such as DCT, SAD, or 3D geometry. It is not intended for OpenCL to be so heavy handed that time-critical -performance sensitive algorithms can not be written by knowledgeable +performance sensitive algorithms cannot be written by knowledgeable developers to perform at near peak performance. Developers willing to throw away portability should be able to use the platform-specific instructions in their code. diff --git a/api/appendix_c.asciidoc b/api/appendix_c.asciidoc index a8b236c2..2531dbb2 100644 --- a/api/appendix_c.asciidoc +++ b/api/appendix_c.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2016-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2016-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] [[data-types]] @@ -140,7 +139,7 @@ so their use should be accompanied by a check of the corresponding preprocessor symbol. -=== Named vector components notation +=== Named Vector Components Notation Vector data type components may be accessed using the `.sN`, `.sn` or `.xyzw` field naming convention, similar to how they are used within the @@ -198,7 +197,7 @@ foo.s7 // illegal - no component s7 ---- -=== High/Low vector component notation +=== High/Low Vector Component Notation Vector data type components may be accessed using the `.hi` and `.lo` notation similar to that supported within the language types. @@ -217,7 +216,7 @@ For example: ---- -=== Native vector type notation +=== Native Vector Type Notation Certain native vector types are defined for providing a mapping of vector types to architecturally built-in vector types. @@ -270,14 +269,14 @@ Explicit casting of native vector types (`+__cl_typen+`) is defined by the external compiler. -== Other operators and functions +== Other Operators and Functions The behavior of standard operators and function on both application vector types (`cl_typen`) and native vector types (`+__cl_typen+`) is defined by the external compiler. -== Application constant definitions +== Application Constant Definitions In addition to the above application type definitions, the following literal definitions are also available. @@ -390,54 +389,54 @@ include::{generated}/api/version-notes/CL_FLT_EPSILON.asciidoc[] | {CL_DBL_DIG_anchor} include::{generated}/api/version-notes/CL_DBL_DIG.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Number of decimal digits of precision for the type {cl_double_TYPE} | {CL_DBL_MANT_DIG_anchor} include::{generated}/api/version-notes/CL_DBL_MANT_DIG.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Number of digits in the mantissa of type {cl_double_TYPE} | {CL_DBL_MAX_10_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MAX_10_EXP.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Maximum positive integer such that 10 raised to this power minus one can be represented as a normalized floating-point number of type {cl_double_TYPE} | {CL_DBL_MAX_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MAX_EXP.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Maximum exponent value of type {cl_double_TYPE} | {CL_DBL_MIN_10_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MIN_10_EXP.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Minimum negative integer such that 10 raised to this power minus one can be represented as a normalized floating-point number of type {cl_double_TYPE} | {CL_DBL_MIN_EXP_anchor} include::{generated}/api/version-notes/CL_DBL_MIN_EXP.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Minimum exponent value of type {cl_double_TYPE} | {CL_DBL_RADIX_anchor} include::{generated}/api/version-notes/CL_DBL_RADIX.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Base value of type {cl_double_TYPE} | {CL_DBL_MAX_anchor} include::{generated}/api/version-notes/CL_DBL_MAX.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Maximum value of type {cl_double_TYPE} | {CL_DBL_MIN_anchor} include::{generated}/api/version-notes/CL_DBL_MIN.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Minimum value of type {cl_double_TYPE} | {CL_DBL_EPSILON_anchor} include::{generated}/api/version-notes/CL_DBL_EPSILON.asciidoc[] -Also see extension *cl_khr_fp64*. +Also see `<>`. | Minimum positive floating-point number of type {cl_double_TYPE} such that `1.0 {plus} {CL_DBL_EPSILON} != 1` is true. | {CL_NAN_anchor} diff --git a/api/appendix_d.asciidoc b/api/appendix_d.asciidoc index aad002ee..7ae631b0 100644 --- a/api/appendix_d.asciidoc +++ b/api/appendix_d.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] [[check-copy-overlap]] diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index 8cc59317..2bec7aeb 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] [[changes_to_opencl]] @@ -12,7 +11,7 @@ versions are summarized below. // (Jon) Are these section and table numbers for the current spec, in which // case they should turn into asciidoctor xrefs, or to older specs? -== Summary of changes from OpenCL 1.0 to OpenCL 1.1 +== Summary of Changes from OpenCL 1.0 to OpenCL 1.1 The following features are added to the OpenCL 1.1 platform layer and runtime (_sections 4 and 5_): @@ -75,11 +74,11 @@ The following features are added to the OpenCL C programming language _section 6.12.3_. ** *async_work_group_strided_copy* defined in section _6.15.11_. ** *vec_step*, *shuffle* and *shuffle2* defined in section _6.15.13_. - * *cl_khr_byte_addressable_store* extension is a core feature. - * *cl_khr_global_int32_base_atomics*, - *cl_khr_global_int32_extended_atomics*, - *cl_khr_local_int32_base_atomics* and - *cl_khr_local_int32_extended_atomics* extensions are core features. + * `<>` extension is a core feature. + * `<>`, + `<>`, + `<>` and + `<>` extensions are core features. The built-in atomic function names are changed to use the *atomic_* prefix instead of *atom_*. * Macros `CL_VERSION_1_0` and `CL_VERSION_1_1`. @@ -98,20 +97,20 @@ The following features in OpenCL 1.0 are deprecated (see glossary) in OpenCL * The `-cl-strict-aliasing` build option has been deprecated. It is no longer required after defining type-based aliasing rules. // Bugzilla 5593 and 6068 - * The *cl_khr_select_fprounding_mode* extension is deprecated and its + * The `<>` extension is deprecated and its use is no longer recommended. The following new extensions are added to _section 9_ in OpenCL 1.1: - * *cl_khr_gl_event* for creating a CL event object from a GL sync object. - * *cl_khr_d3d10_sharing* for sharing memory objects with Direct3D 10. + * `<>` for creating a CL event object from a GL sync object. + * `<>` for sharing memory objects with Direct3D 10. The following modifications are made to the OpenCL ES Profile described in _section 10_ in OpenCL 1.1: * 64-bit integer support is optional. -== Summary of changes from OpenCL 1.1 to OpenCL 1.2 +== Summary of Changes from OpenCL 1.1 to OpenCL 1.2 The following features are added to the OpenCL 1.2 platform layer and runtime (_sections 4 and 5_): @@ -147,7 +146,7 @@ runtime (_sections 4 and 5_): a kernel. * {clEnqueueMarkerWithWaitList} and {clEnqueueBarrierWithWaitList} APIs. * {clUnloadPlatformCompiler} to request that a single platform's compiler is - unloaded. This is compatible with the *cl_khr_icd* extension if that is + unloaded. This is compatible with the `<>` extension if that is supported, unlike {clUnloadCompiler}. The following features are added to the OpenCL C programming language @@ -184,7 +183,7 @@ The following APIs in OpenCL 1.1 are deprecated (see glossary) in OpenCL // Bugzilla 5391 - cl_khr_icd specification * {clUnloadCompiler} and {clGetExtensionFunctionAddress} APIs are deprecated. The {clUnloadPlatformCompiler} and {clGetExtensionFunctionAddressForPlatform} - APIs provide equivalent functionality are compatible with the *cl_khr_icd* + APIs provide equivalent functionality are compatible with the `<>` extension. The following queries are deprecated (see glossary) in OpenCL 1.2: @@ -194,7 +193,7 @@ The following queries are deprecated (see glossary) in OpenCL 1.2: The minimum data type alignment can be derived from {CL_DEVICE_MEM_BASE_ADDR_ALIGN}. -== Summary of changes from OpenCL 1.2 to OpenCL 2.0 +== Summary of Changes from OpenCL 1.2 to OpenCL 2.0 The following features are added to the OpenCL 2.0 platform layer and runtime (_sections 4 and 5_): @@ -289,7 +288,7 @@ The following queries are deprecated (see glossary) in OpenCL 2.0: The deprecated functions are still described in section 6.15.12.8 of this specification. -== Summary of changes from OpenCL 2.0 to OpenCL 2.1 +== Summary of Changes from OpenCL 2.0 to OpenCL 2.1 The following features are added to the OpenCL 2.1 platform layer and runtime (_sections 4 and 5_): @@ -328,7 +327,7 @@ runtimes. The SPIR-V and OpenCL SPIR-V Environment specifications have been added. -== Summary of changes from OpenCL 2.1 to OpenCL 2.2 +== Summary of Changes from OpenCL 2.1 to OpenCL 2.2 The following changes have been made to the OpenCL 2.2 execution model (section 3) @@ -354,7 +353,7 @@ runtime (section 4 and 5): Added definition of Deprecation and Specialization constants to the glossary. -== Summary of changes from OpenCL 2.2 to OpenCL 3.0 +== Summary of Changes from OpenCL 2.2 to OpenCL 3.0 OpenCL 3.0 is a major revision that breaks backwards compatibility with previous versions of OpenCL, see @@ -458,7 +457,7 @@ conformance process: * {CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED} -== Summary of changes from OpenCL 3.0 +== Summary of Changes from OpenCL 3.0 The first non-provisional version of the OpenCL 3.0 specifications was *v3.0.5*. @@ -466,14 +465,14 @@ Changes from *v3.0.5*: * Fixed the calculation in "mapping work-items onto an ND-range". * Added new extensions: - ** `cl_khr_extended_versioning` - ** `cl_khr_subgroup_extended_types` - ** `cl_khr_subgroup_non_uniform_vote` - ** `cl_khr_subgroup_ballot` - ** `cl_khr_subgroup_non_uniform_arithmetic` - ** `cl_khr_subgroup_shuffle` - ** `cl_khr_subgroup_shuffle_relative` - ** `cl_khr_subgroup_clustered_reduce` + ** `<>` + ** `<>` + ** `<>` + ** `<>` + ** `<>` + ** `<>` + ** `<>` + ** `<>` Changes from *v3.0.6*: @@ -482,11 +481,11 @@ Changes from *v3.0.6*: * Clarified the table structure in the backwards compatibility appendix. * Clarified that `-cl-unsafe-math-optimizations` also implies `-cl-denorms-are-zero`. * Added new extensions: - ** `cl_khr_extended_bit_ops` - ** `cl_khr_pci_bus_info` - ** `cl_khr_spirv_extended_debug_info` - ** `cl_khr_spirv_linkonce_odr` - ** `cl_khr_suggested_local_work_size` + ** `<>` + ** `<>` + ** `<>` + ** `<>` + ** `<>` Changes from *v3.0.7*: @@ -494,7 +493,7 @@ Changes from *v3.0.7*: * Removed unnecessary phrase from sub-group mask function descriptions. * Added _input_slice_pitch_ error condition for read and write image APIs. * Added new extension: - ** `cl_khr_integer_dot_product` + ** `<>` Changes from *v3.0.8*: @@ -502,18 +501,18 @@ Changes from *v3.0.8*: * Clarified requirements for {CL_DEVICE_DOUBLE_FP_CONFIG} prior to OpenCL 2.0. * Clarified the behavior of ballot operations for remainder sub-groups. * Added new extensions: - ** `cl_khr_integer_dot_product` (version 2) - ** `cl_khr_semaphore` (provisional) - ** `cl_khr_external_semaphore` (provisional) - ** `cl_khr_external_semaphore_dx_fence` (provisional) - ** `cl_khr_external_semaphore_opaque_fd` (provisional) - ** `cl_khr_external_semaphore_sync_fd` (provisional) - ** `cl_khr_external_semaphore_win32` (provisional) - ** `cl_khr_external_memory` (provisional) - ** `cl_khr_external_memory_dma_buf` (provisional) - ** `cl_khr_external_memory_dx` (provisional) - ** `cl_khr_external_memory_opaque_fd` (provisional) - ** `cl_khr_external_memory_win32` (provisional) + ** `<>` (version 2) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) + ** `<>` (provisional) Changes from *v3.0.9*: @@ -522,10 +521,10 @@ Changes from *v3.0.9*: * Clarified that {clCompileProgram} is valid for programs created from SPIR. * Documented the possible state of a kernel object after a failed call to {clSetKernelArg}. * Added new extensions: - ** `cl_khr_async_copy_fence` (final) - ** `cl_khr_extended_async_copies` (final) - ** `cl_khr_expect_assume` - ** `cl_khr_command_buffer` (provisional) + ** `<>` (final) + ** `<>` (final) + ** `<>` + ** `<>` (provisional) Changes from *v3.0.10*: @@ -538,8 +537,8 @@ Changes from *v3.0.10*: * Clarified that the extended versioning extension is a core OpenCL 3.0 feature. * Clarified sub-group clustered reduction behavior when the cluster size is not an integer constant or a power of two. * Added new extensions: - ** `cl_khr_subgroup_rotate` - ** `cl_khr_work_group_uniform_arithmetic` + ** `<>` + ** `<>` Changes from *v3.0.11*: @@ -547,29 +546,29 @@ Changes from *v3.0.11*: * Added a maximum limit for the number of arguments supported by a kernel. * Clarified requirements for comparability and uniqueness of object handles. * Clarified behavior for invalid device-side enqueue `clk_event_t` handles. - * Clarified `cl_khr_command_buffer` interactions with other extensions. + * Clarified `<>` interactions with other extensions. * Specified error behavior when a command buffer is finalized multiple times. * Added new extension: - ** `cl_khr_command_buffer_mutable_dispatch` (provisional) + ** `<>` (provisional) Changes from *v3.0.12*: * Fixed the accuracy requirements description for half-precision math functions (those prefixed by `half_`). * Clarified that the semaphore type must always be provided when creating a semaphore. * Removed an unnecessary and contradictory error condition when creating a semaphore. - * Added an issue regarding non-linear image import to the `cl_khr_external_memory` extension. - * Added missing calls to {clBuildProgram} to the `cl_khr_command_buffer` and `cl_khr_command_buffer_mutable_dispatch` sample code. + * Added an issue regarding non-linear image import to the `<>` extension. + * Added missing calls to {clBuildProgram} to the `<>` and `<>` sample code. * Fixed a copy-paste error in the extensions quick reference appendix. * Fixed typos and improved formatting consistency in the extensions spec. Changes from *v3.0.13*: - * Corrected the precision for `cross` and `dot` to be based on `HALF_EPSILON` in `cl_khr_fp16`, see {khronos-opencl-pr}/893[#893]. - * Added a context query for command-buffers to `cl_khr_command_buffer`, see {khronos-opencl-pr}/899[#899]. - * Updated the semaphore wait and signal rules for binary semaphores in `cl_khr_semaphore`, see {khronos-opencl-pr}/882[#882]. - * Removed redundant error conditions from `cl_khr_external_semaphore` and `cl_khr_external_memory`, see {khronos-opencl-pr}/903[#903] and {khronos-opencl-pr}/904[#904]. + * Corrected the precision for `cross` and `dot` to be based on `HALF_EPSILON` in `<>`, see {khronos-opencl-pr}/893[#893]. + * Added a context query for command-buffers to `<>`, see {khronos-opencl-pr}/899[#899]. + * Updated the semaphore wait and signal rules for binary semaphores in `<>`, see {khronos-opencl-pr}/882[#882]. + * Removed redundant error conditions from `<>` and `<>`, see {khronos-opencl-pr}/903[#903] and {khronos-opencl-pr}/904[#904]. * Added new extension: - ** `cl_khr_command_buffer_multi_device` (provisional) + ** `<>` (provisional) Changes from *v3.0.14*: @@ -579,24 +578,23 @@ Changes from *v3.0.14*: * Clarified that {clSetCommandQueueProperty} is only required for OpenCL 1.0 devices and may return an error otherwise, see {khronos-opencl-pr}/980[#980]. * Clarified that the application must ensure the free function passed to {clEnqueueSVMFree} is thread safe, see {khronos-opencl-pr}/1016[#1016]. * Clarified that the application must ensure the user function passed to {clEnqueueNativeKernel} is thread safe, see {khronos-opencl-pr}/1026[#1026]. - * `cl_khr_command_buffers` (provisional): + * `<>` (provisional): ** Removed the "invalid" command buffer state, see {khronos-opencl-pr}/885[#885]. ** Added support for recording SVM memory copies and memory fills in a command buffer, see {khronos-opencl-pr}/915[#915]. - * `cl_khr_command_buffer_multi_device` (provisional): + * `<>` (provisional): ** Clarified that the sync devices query should only return root devices, see {khronos-opencl-pr}/925[#925]. - * `cl_khr_external_memory` (provisional): + * `<>` (provisional): ** Disallowed specifying a device handle list without also specifying an external memory handle, see {khronos-opencl-pr}/922[#922]. ** Added a query to determine the handle types an implementation will assume have a linear memory layout, see {khronos-opencl-pr}/940[#940]. ** Added an external memory-specific device handle list enum, see {khronos-opencl-pr}/956[#956]. ** Clarified that implementations may acquire information about an image from an external memory handle when the image is created, see {khronos-opencl-pr}/970[#970]. - * `cl_khr_external_semaphore` (provisional): + * `<>` (provisional): ** Added the ability to re-import "sync fd" handles into an existing semaphore, see {khronos-opencl-pr}/939[#939]. - ** Clarified that a semaphore may only export one handle type, and that a semaphore created from an external handle cannot also export a handle, see {khronos-opencl-pr}/975[#975]. - ** Clarified that `cl_khr_external_semaphore` requires support for `cl_khr_semaphore`, see {khronos-opencl-pr}/976[#976]. + ** Clarified that a semaphore may only export one handle type, and that a semaphore created from an external handle cannot also export a handle, see {khronos-opencl-pr}/975[#975]. + ** Clarified that `<>` requires support for `<>`, see {khronos-opencl-pr}/976[#976]. ** Added a query to determine if a semaphore may export an external handle, see {khronos-opencl-pr}/997[#997]. - * `cl_khr_semaphore` (provisional): + * `<>` (provisional): ** Added an semaphore-specific device handle list enum, see {khronos-opencl-pr}/956[#956]. ** Restricted semaphores to a single associated device, see {khronos-opencl-pr}/996[#996]. - * `cl_khr_subgroup_rotate`: + * `<>`: ** Clarified that only rotating within a subgroup is supported, see {khronos-opencl-pr}/967[#967]. - diff --git a/api/appendix_extensions.asciidoc b/api/appendix_extensions.asciidoc new file mode 100644 index 00000000..2e5ceccb --- /dev/null +++ b/api/appendix_extensions.asciidoc @@ -0,0 +1,37 @@ +// Copyright 2023-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +[appendix] + +[[extensions]] += OpenCL Extensions (Informative) + +Extensions to the OpenCL API can be defined by authors, groups of authors, +and the Khronos OpenCL Working Group. +The online Registry of extensions is available at URL + +https://registry.khronos.org/OpenCL + +It is possible to generate versions of the API Specification incorporating +different extensions. +At present only a subset of defined extensions can be incorporated in this +fashion. + +The remainder of this appendix documents a set of extensions chosen when +this document was built. + +Extensions are grouped as Khronos `khr`, multivendor `ext`, and then +alphabetically by author ID. +Within each group, extensions are listed in alphabetical order by their +names. + +== Extension Dependencies + +Extensions which have dependencies on specific core versions or on other +extensions will list such dependencies. + +All extensions implicitly require support for OpenCL 1.0. + +include::{generated}/meta/current_extensions_appendix.txt[] +include::{generated}/meta/provisional_extensions_appendix.txt[] +include::{generated}/meta/deprecated_extensions_appendix.txt[] diff --git a/api/appendix_f.asciidoc b/api/appendix_f.asciidoc index 57b6c243..f2bb39c6 100644 --- a/api/appendix_f.asciidoc +++ b/api/appendix_f.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] [[error_codes]] @@ -333,4 +332,11 @@ include::{generated}/api/version-notes/CL_MAX_SIZE_RESTRICTION_EXCEEDED.asciidoc include::{generated}/api/version-notes/CL_PROFILING_INFO_NOT_AVAILABLE.asciidoc[] | Returned by {clGetEventProfilingInfo} when the command associated with the specified event was not enqueued into a command-queue with {CL_QUEUE_PROFILING_ENABLE}. +ifdef::cl_khr_icd[] +| {CL_PLATFORM_NOT_FOUND_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_NOT_FOUND_KHR.asciidoc[] + | Returned by {clGetPlatformIDs} when no platforms are available. +endif::cl_khr_icd[] + |==== diff --git a/api/appendix_g.asciidoc b/api/appendix_g.asciidoc index fe0850ce..1c470372 100644 --- a/api/appendix_g.asciidoc +++ b/api/appendix_g.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2019-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [appendix] [[error_other_misc_enums]] diff --git a/api/appendix_h.asciidoc b/api/appendix_h.asciidoc index eda297c9..9e9ec6ea 100644 --- a/api/appendix_h.asciidoc +++ b/api/appendix_h.asciidoc @@ -1,6 +1,4 @@ -// Copyright 2020-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2020-2024 The Khronos Group Inc. [appendix] [[opencl-3.0-backwards-compatibility]] @@ -298,7 +296,7 @@ When read-write images are not supported: OpenCL C compilers supporting read-write images will define the feature macro `+__opencl_c_read_write_images+`. -== Creating 2D Images from Buffers +== Creating 2D Images From Buffers Creating a 2D image from a buffer is optional for devices supporting OpenCL 3.0. When creating a 2D image from a buffer is not supported: @@ -315,7 +313,7 @@ When creating a 2D image from a buffer is not supported: | {clGetDeviceInfo}, passing + {CL_DEVICE_EXTENSIONS} -| Will not describe support for the `cl_khr_image2d_from_buffer` extension if _device_ does not support creating a 2D image from a buffer. +| Will not describe support for the `<>` extension if _device_ does not support creating a 2D image from a buffer. | {clCreateImage} or + {clCreateImageWithProperties}, passing + @@ -426,7 +424,7 @@ When sub-groups are not supported: | {clGetDeviceInfo}, passing + {CL_DEVICE_EXTENSIONS} -| Will not describe support for the `cl_khr_subgroups` extension if _device_ does not support sub-groups. +| Will not describe support for the `<>` extension if _device_ does not support sub-groups. | {clGetKernelSubGroupInfo} | Returns {CL_INVALID_OPERATION} if _device_ does not support sub-groups. @@ -470,7 +468,7 @@ When writing to 3D image objects is not supported: | {clGetDeviceInfo}, passing + {CL_DEVICE_EXTENSIONS} -| Will not describe support for the `cl_khr_3d_image_writes` extension if _device_ does not support writing to 3D image objects. +| Will not describe support for the `<>` extension if _device_ does not support writing to 3D image objects. | {clGetSupportedImageFormats}, passing + {CL_MEM_OBJECT_IMAGE3D} and one of + @@ -546,7 +544,7 @@ OpenCL C compilers supporting the generic address space will define the feature // *** `get_local_linear_id` // ** `work_group_barrier` (as a synonym for `barrier`) -== Language Features that Were Already Optional +== Language Features That Were Already Optional Some OpenCL C language features were already optional before OpenCL 3.0, the API mechanisms for querying these have not changed. diff --git a/api/cl_khr_3d_image_writes.asciidoc b/api/cl_khr_3d_image_writes.asciidoc new file mode 100644 index 00000000..7c980404 --- /dev/null +++ b/api/cl_khr_3d_image_writes.asciidoc @@ -0,0 +1,26 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_3d_image_writes.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_3d_image_writes` adds built-in OpenCL C functions that allow a +kernel to write to 3D image objects in addition to 2D image objects. + +See the link:{OpenCLCSpecURL}#cl_khr_3d_image_writes[3D Image Writes] +section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_async_work_group_copy_fence.asciidoc b/api/cl_khr_async_work_group_copy_fence.asciidoc new file mode 100644 index 00000000..3ec9923c --- /dev/null +++ b/api/cl_khr_async_work_group_copy_fence.asciidoc @@ -0,0 +1,29 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_async_work_group_copy_fence.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-11-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_async_work_group_copy_fence` adds a new built-in OpenCL C function +to establish a memory synchronization ordering of asynchronous copies. + +See the link:{OpenCLCSpecURL}#cl_khr_async_work_group_copy_fence[Async +Work-group Copy Fence] section of the OpenCL C specification for more +information. + +=== Version History + + * Revision 0.9.0, 2020-04-21 + ** First assigned version (provisional). + * Revision 1.0.0, 2021-11-10 + ** First non-provisional version. diff --git a/api/cl_khr_byte_addressable_store.asciidoc b/api/cl_khr_byte_addressable_store.asciidoc new file mode 100644 index 00000000..56a32e6e --- /dev/null +++ b/api/cl_khr_byte_addressable_store.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_byte_addressable_store.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*Interactions and External Dependencies*:: + - Promoted to OpenCL 1.1 core +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_byte_addressable_store` relaxes restrictions on pointers to `char`, +`uchar`, `char2`, `uchar2`, `short`, `ushort` and `half` that were present +in _Section 6.8m: Restrictions_ of the OpenCL 1.0 specification. +With this extension, applications are able to read from and write to +pointers to these types. + +This extension became a core feature in OpenCL 1.1. + +See the link:{OpenCLCSpecURL}#cl_khr_byte_addressable_store[Byte-Addressable +Storage] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_command_buffer.asciidoc b/api/cl_khr_command_buffer.asciidoc new file mode 100644 index 00000000..6099d772 --- /dev/null +++ b/api/cl_khr_command_buffer.asciidoc @@ -0,0 +1,436 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_command_buffer.txt[] + +// *Revision*:: +// 0.9.4 +// *Extension and Version Dependencies*:: +// This extension requires OpenCL 1.2 or later. +// Buffering of SVM commands requires OpenCL 2.0 or later. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-03-31 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ewan Crawford, Codeplay Software Ltd. + - Gordon Brown, Codeplay Software Ltd. + - Kenneth Benzie, Codeplay Software Ltd. + - Alastair Murray, Codeplay Software Ltd. + - Jack Frankland, Codeplay Software Ltd. + - Balaji Calidas, Qualcomm Technologies Inc. + - Joshua Kelly, Qualcomm Technologies, Inc. + - Kevin Petit, Arm Ltd. + - Aharon Abramson, Intel. + - Ben Ashbaugh, Intel. + - Boaz Ouriel, Intel. + - Chris Gearing, Intel. + - Pekka Jääskeläinen, Tampere University and Intel + - Jan Solanti, Tampere University + - Nikhil Joshi, NVIDIA + - James Price, Google + - Brice Videau, Argonne National Laboratory + +=== Description + +`cl_khr_command_buffer` adds the ability to record and replay buffers of +OpenCL commands. + +Command-buffers enable a reduction in overhead when enqueuing the same +workload multiple times. By separating the command-queue setup from dispatch, +the ability to replay a set of previously created commands is introduced. + +Device-side _cl_sync_point_khr_ synchronization-points can be used within +command-buffers to define command dependencies. This allows the commands of a +command-buffer to execute out-of-order on a single <> +command-queue. The command-buffer itself has no inherent in-order/out-of-order +property, this ordering is inferred from the command-queue used on command +recording. Out-of-order enqueues without event dependencies of both regular +commands, such as {clEnqueueFillBuffer}, and command-buffers are allowed to +execute concurrently, and it is up to the user to express any dependencies using +events. + +The command-queues a command-buffer will be executed on can be set on replay via +parameters to {clEnqueueCommandBufferKHR}, provided they are +<> with the command-queues used on command-buffer +recording. + +==== Background + +On embedded devices where building a command stream accounts for a significant +expenditure of resources and where workloads are often required to be pipelined, +a solution that minimizes driver overhead can significantly improve the +utilization of accelerators by removing a bottleneck in repeated command stream +generation. + +An additional motivator is lowering task execution latency, as devices can be +kept occupied with work by repeated submissions, without having to wait on +the host to construct commands again for a similar workload. + +==== Rationale + +The command-buffer abstraction over the generation of command streams is a +proven approach which facilitates a significant reduction in driver overhead in +existing real-world applications with repetitive pipelined workloads which are +built on top of Vulkan, DirectX 12, and Metal. + +A primary goal is for a command-buffer to avoid any interaction with +application code after being enqueued until all recorded commands have +completed. As such, any command which maps or migrates memory objects; reads +or writes memory objects; or enqueues a native kernel, is not available for +command-buffer recording. Finally commands recorded into a command buffer do +not wait for or return event objects, these are instead replaced with +device-side synchronization-point identifiers which enable out-of-order +execution when enqueued on <> command-queues. + +Adding new entry-points for individual commands, rather than recording existing +command-queue APIs with begin/end markers was a design decision made for the +following reasons: + +* Individually specified entry points makes it clearer to the user what's + supported, as opposed to adding a large number of error conditions + throughout the specification with all the restrictions. + +* Prevents code forking in existing entry points for the implementer, as + otherwise separate paths in each entry point need to be maintained for both + the recording and normal cases. + +* Allows the definition of a new device-side synchronization primitive rather + than overloading {cl_event_TYPE}. As use of {cl_event_TYPE} in individual commands + allows host interaction from callback and user-events, as well as introducing + complexities when a command-buffer is enqueued multiple times regarding + profiling and execution status. + +* New entry points facilitate returning handles to individual commands, allowing + those commands to be modified between enqueues of the command buffer. Not all + command handles are used in this extension, but providing them facilitates + other extensions layered on top to take advantage of them to provide additional + mutable functionality. + +==== Simultaneous Use + +The optional simultaneous use capability was added to the extension so that +vendors can support pipelined workflows, where command-buffers are repeatedly +enqueued without blocking in user code. However, simultaneous use may result in +command-buffers being more expensive to enqueue than in a sequential model, so +the capability is optional to enable optimizations on command-buffer recording. + +=== Interactions With Other Extensions + +The introduction of the command-buffer abstraction enables functionality +beyond what the `cl_khr_command_buffer` extension currently provides, i.e. +the recording of immutable commands to a single queue which can then be +executed without commands synchronizing outside the command-buffer. It is +intended that extra functionality expanding on this will be provided as layered +extensions on top of `cl_khr_command_buffer`. + +Having `cl_khr_command_buffer` as a minimal base specification means that the +API defines mechanisms for functionality that is not enabled by this extension, +these are described in the following sub-sections. `cl_khr_command_buffer` will +retain its provisional extension status until other layered extensions are +released, as these may reveal modifications needed to the base specification to +support their intended use cases. + +==== ND-range Kernel Command Properties + +The {clCommandNDRangeKernelKHR} entry-point defines a `properties` parameter of +new type {cl_ndrange_kernel_command_properties_khr_TYPE}. No properties are defined +in `cl_khr_command_buffer`, but the parameter is intended to enable future +functionality that would change the characteristics of the kernel command. + +==== Command Handles + +All command recording entry-points define a {cl_mutable_command_khr_TYPE} output +parameter which provides a handle to the specific command being recorded. Use of +these output handles is not enabled by the `cl_khr_command_buffer` extension, +but the handles will allow individual commands in a command-buffer to be +referenced by the user. In particular, the capability for an application to use +these handles to modify commands between enqueues of a command-buffer is +envisaged. + +==== List of Queues + +Only a single command-queue can be associated with a command-buffer in the +`cl_khr_command_buffer` extension, but the API is designed with the intention +that a future extension will allow commands to be recorded across multiple +queues in the same command-buffer, providing replay of heterogeneous task +graphs. + +Using multiple queue functionality will result in an error without any layered +extensions to relax usage of the following API features: + +* When a command-buffer is created the API enables passing a list of queues + that the command-buffer will record commands to. Only a single queue is + permitted in `cl_khr_command_buffer`. + +* Individual command recording entry-points define a {cl_command_queue_TYPE} + parameter for which of the queues set on command-buffer creation that command + should be record to. This must be passed as NULL in `cl_khr_command_buffer`. + +* {clEnqueueCommandBufferKHR} takes a list of queues for command-buffer execution, + correspond to those set on creation. Only a single queue is permitted in + `cl_khr_command_buffer`. + +// The 'New ...' section can be auto-generated + +=== New Commands + + * {clCreateCommandBufferKHR} + * {clRetainCommandBufferKHR} + * {clReleaseCommandBufferKHR} + * {clFinalizeCommandBufferKHR} + * {clEnqueueCommandBufferKHR} + * {clCommandBarrierWithWaitListKHR} + * {clCommandCopyBufferKHR} + * {clCommandCopyBufferRectKHR} + * {clCommandCopyBufferToImageKHR} + * {clCommandCopyImageKHR} + * {clCommandCopyImageToBufferKHR} + * {clCommandFillBufferKHR} + * {clCommandFillImageKHR} + * {clCommandNDRangeKernelKHR} + * {clGetCommandBufferInfoKHR} + * The following SVM entry points are supported only with at least OpenCL 2.0, + and starting from version 0.9.4 of this extension + ** {clCommandSVMMemcpyKHR} + ** {clCommandSVMMemFillKHR} + +=== New Structures + + * {cl_command_buffer_khr} + * {cl_mutable_command_khr} + +=== New Types + + * {cl_device_command_buffer_capabilities_khr_TYPE} + * {cl_command_buffer_khr_TYPE} + * {cl_sync_point_khr_TYPE} + * {cl_command_buffer_info_khr_TYPE} + * {cl_command_buffer_state_khr_TYPE} + * {cl_command_buffer_properties_khr_TYPE} + * {cl_command_buffer_flags_khr_TYPE} + * {cl_ndrange_kernel_command_properties_khr_TYPE} + * {cl_mutable_command_khr_TYPE} + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR} + ** {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR} + * {cl_device_command_buffer_capabilities_khr - bitfield_TYPE} + ** {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR} + ** {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR} + ** {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR} + ** {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR} + * {cl_command_buffer_properties_khr_TYPE} + ** {CL_COMMAND_BUFFER_FLAGS_KHR} + * {cl_command_buffer_flags_khr - bitfield_TYPE} + ** {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} + * {Error codes_TYPE} + ** {CL_INVALID_COMMAND_BUFFER_KHR} + ** {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} + ** {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} + * {cl_command_buffer_info_khr_TYPE} + ** {CL_COMMAND_BUFFER_QUEUES_KHR} + ** {CL_COMMAND_BUFFER_NUM_QUEUES_KHR} + ** {CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR} + ** {CL_COMMAND_BUFFER_STATE_KHR} + ** {CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR} + ** {CL_COMMAND_BUFFER_CONTEXT_KHR} + * {cl_command_buffer_state_khr_TYPE} + ** {CL_COMMAND_BUFFER_STATE_RECORDING_KHR} + ** {CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR} + ** {CL_COMMAND_BUFFER_STATE_PENDING_KHR} + * {cl_command_type_TYPE} + ** {CL_COMMAND_COMMAND_BUFFER_KHR} + +=== Sample Code + +[source] +---- + #define CL_CHECK(ERROR) \ + if (ERROR) { \ + std::cerr << "OpenCL error: " << ERROR << "\n"; \ + return ERROR; \ + } + + int main() { + cl_platform_id platform; + CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); + cl_device_id device; + CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr)); + + cl_int error; + cl_context context = + clCreateContext(nullptr, 1, &device, nullptr, nullptr, &error); + CL_CHECK(error); + + const char* code = R"OpenCLC( + kernel void vector_addition(global int* tile1, global int* tile2, + global int* res) { + size_t index = get_global_id(0); + res[index] = tile1[index] + tile2[index]; + } + )OpenCLC"; + const size_t length = std::strlen(code); + + cl_program program = + clCreateProgramWithSource(context, 1, &code, &length, &error); + CL_CHECK(error); + + CL_CHECK(clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr)); + + cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); + CL_CHECK(error); + + constexpr size_t frame_count = 60; + constexpr size_t frame_elements = 1024; + constexpr size_t frame_size = frame_elements * sizeof(cl_int); + + constexpr size_t tile_count = 16; + constexpr size_t tile_elements = frame_elements / tile_count; + constexpr size_t tile_size = tile_elements * sizeof(cl_int); + + cl_mem buffer_tile1 = + clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, nullptr, &error); + CL_CHECK(error); + cl_mem buffer_tile2 = + clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, nullptr, &error); + CL_CHECK(error); + cl_mem buffer_res = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, tile_size, nullptr, &error); + CL_CHECK(error); + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(buffer_tile1), &buffer_tile1)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(buffer_tile2), &buffer_tile2)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(buffer_res), &buffer_res)); + + cl_command_queue command_queue = + clCreateCommandQueue(context, device, + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); + CL_CHECK(error); + + cl_command_buffer_khr command_buffer = + clCreateCommandBufferKHR(1, &command_queue, nullptr, &error); + CL_CHECK(error); + + cl_mem buffer_src1 = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + cl_mem buffer_src2 = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + cl_mem buffer_dst = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + + cl_sync_point_khr tile_sync_point = 0; + for (size_t tile_index = 0; tile_index < tile_count; tile_index++) { + std::array copy_sync_points; + CL_CHECK(clCommandCopyBufferKHR(command_buffer, + command_queue, buffer_src1, buffer_tile1, tile_index * tile_size, 0, + tile_size, tile_sync_point ? 1 : 0, + tile_sync_point ? &tile_sync_point : nullptr, ©_sync_points[0]), + nullptr); + CL_CHECK(clCommandCopyBufferKHR(command_buffer, + command_queue, buffer_src2, buffer_tile2, tile_index * tile_size, 0, + tile_size, tile_sync_point ? 1 : 0, + tile_sync_point ? &tile_sync_point : nullptr, ©_sync_points[1]), + nullptr); + + cl_sync_point_khr nd_sync_point; + CL_CHECK(clCommandNDRangeKernelKHR(command_buffer, + command_queue, nullptr, kernel, 1, nullptr, &tile_elements, nullptr, + copy_sync_points.size(), copy_sync_points.data(), &nd_sync_point, + nullptr)); + + CL_CHECK(clCommandCopyBufferKHR(command_buffer, + command_queue, buffer_res, buffer_dst, 0, tile_index * tile_size, + tile_size, 1, &nd_sync_point, &tile_sync_point, nullptr)); + } + + CL_CHECK(clFinalizeCommandBufferKHR(command_buffer)); + + std::random_device random_device; + std::mt19937 random_engine{random_device()}; + std::uniform_int_distribution random_distribution{ + 0, std::numeric_limits::max() / 2}; + auto random_generator = [&]() { return random_distribution(random_engine); }; + + for (size_t frame_index = 0; frame_index < frame_count; frame_index++) { + std::array write_src_events; + std::vector src1(frame_elements); + std::generate(src1.begin(), src1.end(), random_generator); + CL_CHECK(clEnqueueWriteBuffer(command_queue, buffer_src1, CL_FALSE, 0, + frame_size, src1.data(), 0, nullptr, + &write_src_events[0])); + std::vector src2(frame_elements); + std::generate(src2.begin(), src2.end(), random_generator); + CL_CHECK(clEnqueueWriteBuffer(command_queue, buffer_src2, CL_FALSE, 0, + frame_size, src2.data(), 0, nullptr, + &write_src_events[1])); + + CL_CHECK(clEnqueueCommandBufferKHR(0, NULL, command_buffer, 2, + write_src_events.data(), nullptr)); + + CL_CHECK(clFinish(command_queue)); + + CL_CHECK(clReleaseEvent(write_src_event[0])); + CL_CHECK(clReleaseEvent(write_src_event[1])); + } + + CL_CHECK(clReleaseCommandBufferKHR(command_buffer)); + CL_CHECK(clReleaseCommandQueue(command_queue)); + + CL_CHECK(clReleaseMemObject(buffer_src1)); + CL_CHECK(clReleaseMemObject(buffer_src2)); + CL_CHECK(clReleaseMemObject(buffer_dst)); + + CL_CHECK(clReleaseMemObject(buffer_tile1)); + CL_CHECK(clReleaseMemObject(buffer_tile2)); + CL_CHECK(clReleaseMemObject(buffer_res)); + + CL_CHECK(clReleaseKernel(kernel)); + CL_CHECK(clReleaseProgram(program)); + CL_CHECK(clReleaseContext(context)); + + return 0; + } +---- + +=== Issues + +. Introduce a `clCloneCommandBufferKHR` entry-point for cloning a + command-buffer. ++ +-- +*UNRESOLVED* +-- +. Enable detached command-buffer execution, where command-buffers are executed + on their own internal queue to prevent locking user created queues for the + duration of their execution. ++ +-- +*UNRESOLVED* +-- + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-11-10 + ** First assigned version (provisional). + * 0.9.1, 2022-08-24 + ** Specify an error if a command-buffer is finalized multiple times + (provisional). + * 0.9.2, 2023-03-31 + ** Introduce context query {CL_COMMAND_BUFFER_CONTEXT_KHR} (provisional). + * 0.9.3, 2023-04-04 + ** Remove Invalid command-buffer state (provisional). + * 0.9.4, 2023-05-11 + ** Add clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR command entries + (provisional). + diff --git a/api/cl_khr_command_buffer_multi_device.asciidoc b/api/cl_khr_command_buffer_multi_device.asciidoc new file mode 100644 index 00000000..9d3d87c3 --- /dev/null +++ b/api/cl_khr_command_buffer_multi_device.asciidoc @@ -0,0 +1,316 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_command_buffer_multi_device.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-04-30 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ewan Crawford, Codeplay Software Ltd. + - Gordon Brown, Codeplay Software Ltd. + - Kenneth Benzie, Codeplay Software Ltd. + - Alastair Murray, Codeplay Software Ltd. + - Jack Frankland, Codeplay Software Ltd. + - Balaji Calidas, Qualcomm Technologies Inc. + - Joshua Kelly, Qualcomm Technologies, Inc. + - Kevin Petit, Arm Ltd. + - Aharon Abramson, Intel. + - Ben Ashbaugh, Intel. + - Boaz Ouriel, Intel. + - Pekka Jääskeläinen, Tampere University and Intel. + - Jan Solanti, Tampere University + - Nikhil Joshi, NVIDIA + - James Price, Google + +=== Description + +The `cl_khr_command_buffer` extension separates command construction from +enqueue by providing a mechanism to record a set of commands which can then +be repeatedly enqueued. +However, the commands in a command-buffer can only be recorded to a single +command-queue specified on command-buffer creation. + +`cl_khr_command_buffer_multi_device` extends the scope of a command-buffer +to allow commands to be recorded across multiple queues in the same +command-buffer, providing execution of heterogeneous task graphs from +command-queues associated with different devices. + +The ability for a user to deep copy an existing command-buffer so that the +commands target a different device is also made possible by +`cl_khr_command_buffer_multi_device`. +Depending on platform support the mapping of commands to the new target +device can be done either explicitly by the user, or automatically by the +OpenCL runtime. + +=== New Types + +Bitfield for querying command-buffer capabilities of an OpenCL Platform with +{clGetPlatformInfo}, see the <>: + + * {cl_platform_command_buffer_capabilities_khr_TYPE} + +=== New Commands + + * {clRemapCommandBufferKHR} + +=== New Enums + +Enums for querying device command-buffer capabilities with +{clGetDeviceInfo}, see the <>: + + * {cl_device_info_TYPE} + ** {CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR} + ** {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR} + * {cl_device_command_buffer_capabilities_khr_TYPE} + ** {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR} + * {cl_command_buffer_flags_khr_TYPE} + ** {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} + * {cl_platform_info_TYPE} + ** {CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR} + * {cl_platform_command_buffer_capabilities_khr_TYPE} + ** {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR} + ** {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} + ** {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} + +=== Sample Code + +[source,opencl] +---- +#define CL_CHECK(ERROR) \ + if (ERROR) { \ + std::cerr << "OpenCL error: " << ERROR << "\n"; \ + return ERROR; \ + } + +int main() { + cl_platform_id platform; + CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); + cl_platform_command_buffer_capabilities_khr platform_caps; + CL_CHECK(clGetPlatformInfo(platform, + CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR, + sizeof(platform_caps), &platform_caps, NULL)); + if (!(platform_caps & CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR)) { + std::cerr << "Command-buffer remapping not supported but used in example, " + "skipping\n"; + return 0; + } + + cl_uint num_devices = 0; + CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices)); + std::vector devices(num_devices); + CL_CHECK( + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, devices.data(), nullptr)); + + // Checks omitted for brevity that either a) the platform supports + // CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR or b) each device is listed + // in the others CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR + + cl_int error; + cl_context context = + clCreateContext(NULL, num_devices, devices.data(), NULL, NULL, &error); + CL_CHECK(error); + + std::vector queues(num_devices); + for (cl_uint i = 0; i < num_devices; i++) { + queues[i] = clCreateCommandQueue(context, devices[i], 0, &error); + CL_CHECK(error); + } + + const char *code = R"OpenCLC( + kernel void vector_addition(global int* tile1, global int* tile2, + global int* res) { + size_t index = get_global_id(0); + res[index] = tile1[index] + tile2[index]; + } + )OpenCLC"; + const size_t length = std::strlen(code); + + cl_program program = + clCreateProgramWithSource(context, 1, &code, &length, &error); + CL_CHECK(error); + + CL_CHECK( + clBuildProgram(program, num_devices, devices.data(), NULL, NULL, NULL)); + + cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); + CL_CHECK(error); + + constexpr size_t frame_count = 60; + constexpr size_t frame_elements = 1024; + constexpr size_t frame_size = frame_elements * sizeof(cl_int); + + constexpr size_t tile_count = 16; + constexpr size_t tile_elements = frame_elements / tile_count; + constexpr size_t tile_size = tile_elements * sizeof(cl_int); + + cl_mem buffer_tile1 = + clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, NULL, &error); + CL_CHECK(error); + + cl_mem buffer_tile2 = + clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, NULL, &error); + CL_CHECK(error); + + cl_mem buffer_res = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, tile_size, NULL, &error); + CL_CHECK(error); + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(buffer_tile1), &buffer_tile1)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(buffer_tile2), &buffer_tile2)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(buffer_res), &buffer_res)); + + cl_command_buffer_khr original_cmdbuf = + clCreateCommandBufferKHR(num_devices, queues.data(), nullptr, &error); + CL_CHECK(error); + + cl_mem buffer_src1 = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, NULL, &error); + CL_CHECK(error); + + cl_mem buffer_src2 = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, NULL, &error); + CL_CHECK(error); + + cl_mem buffer_dst = + clCreateBuffer(context, CL_MEM_READ_WRITE, frame_size, NULL, &error); + CL_CHECK(error); + + cl_sync_point_khr tile_sync_point = 0; + for (size_t tile_index = 0; tile_index < tile_count; tile_index++) { + cl_sync_point_khr copy_sync_points[2]; + CL_CHECK(clCommandCopyBufferKHR( + original_cmdbuf, queues[tile_index % num_devices], buffer_src1, + buffer_tile1, tile_index * tile_size, 0, tile_size, + tile_sync_point ? 1 : 0, tile_sync_point ? &tile_sync_point : NULL, + ©_sync_points[0], NULL)); + + CL_CHECK(clCommandCopyBufferKHR( + original_cmdbuf, queues[tile_index % num_devices], buffer_src2, + buffer_tile2, tile_index * tile_size, 0, tile_size, + tile_sync_point ? 1 : 0, + tile_sync_point ? &tile_sync_point : nullptr, + ©_sync_points[1], NULL)); + + cl_sync_point_khr nd_sync_point; + CL_CHECK(clCommandNDRangeKernelKHR( + original_cmdbuf, queues[tile_index % num_devices], NULL, kernel, 1, + NULL, &tile_elements, NULL, 2, copy_sync_points, &nd_sync_point, NULL)); + + CL_CHECK(clCommandCopyBufferKHR( + original_cmdbuf, queues[tile_index % num_devices], buffer_res, + buffer_dst, 0, tile_index * tile_size, tile_size, 1, &nd_sync_point, + &tile_sync_point, NULL)); + } + + CL_CHECK(clFinalizeCommandBufferKHR(original_cmdbuf)); + + std::random_device random_device; + std::mt19937 random_engine{random_device()}; + std::uniform_int_distribution random_distribution{ + 0, std::numeric_limits::max() / 2}; + auto random_generator = [&]() { return random_distribution(random_engine); }; + + auto enqueue_frame = [&](cl_command_buffer_khr command_buffer) { + for (size_t frame_index = 0; frame_index < frame_count; frame_index++) { + std::array enqueue_events; + std::vector src1(frame_elements); + std::generate(src1.begin(), src1.end(), random_generator); + CL_CHECK(clEnqueueWriteBuffer(queues[0], buffer_src1, CL_FALSE, 0, + frame_size, src1.data(), 0, nullptr, + &enqueue_events[0])); + std::vector src2(frame_elements); + std::generate(src2.begin(), src2.end(), random_generator); + CL_CHECK(clEnqueueWriteBuffer(queues[0], buffer_src2, CL_FALSE, 0, + frame_size, src2.data(), 0, nullptr, + &enqueue_events[1])); + + CL_CHECK(clEnqueueCommandBufferKHR(0, NULL, command_buffer, 2, + enqueue_events.data(), + &enqueue_events[2])); + + CL_CHECK(clWaitForEvents(1, enqueue_events[2])); + + for (auto e : enqueue_events) { + CL_CHECK(clReleaseEvent(e)); + } + } + return 0; + }; + + error = enqueue_frame(original_cmdbuf); + CL_CHECK(error); + + // Remap from N queues to 1 queue and run again + cl_command_buffer_khr remapped_cmdbuf = clRemapCommandBufferKHR( + original_cmdbuf, CL_TRUE, 1, queues.data(), 0, NULL, NULL, &error); + CL_CHECK(error); + + error = enqueue_frame(remapped_cmdbuf); + CL_CHECK(error); + + for (unsigned i = 0; i < num_devices; ++i) { + CL_CHECK(clReleaseCommandQueue(queues[i])); + } + CL_CHECK(clReleaseMemObject(buffer_src1)); + CL_CHECK(clReleaseMemObject(buffer_src2)); + CL_CHECK(clReleaseMemObject(buffer_dst)); + + CL_CHECK(clReleaseMemObject(buffer_tile1)); + CL_CHECK(clReleaseMemObject(buffer_tile2)); + CL_CHECK(clReleaseMemObject(buffer_res)); + + CL_CHECK(clReleaseCommandBufferKHR(original_cmdbuf)); + CL_CHECK(clReleaseCommandBufferKHR(remapped_cmdbuf)); + + CL_CHECK(clReleaseKernel(kernel)); + CL_CHECK(clReleaseProgram(program)); + CL_CHECK(clReleaseContext(context)); + + return 0; +} +---- + +=== Issues + +. In cl_event profiling info for a command-buffer running across the queues for + several devices, how do we know what the first & last commands executed + are if there is concurrent execution across devices. ++ +-- +*RESOLVED*: Allowed an implementation to fallback to +{CL_PROFILING_COMMAND_SUBMIT} and {CL_PROFILING_COMMAND_COMPLETE} when +reporting {CL_PROFILING_COMMAND_START} & {CL_PROFILING_COMMAND_END}. +-- +. Is an atomic constraint required? This would forbid regular clEnqueue* commands, + from interleaving execution on a queue which a command-buffer is being + executed on. ++ +-- +*RESOLVED*: This behavior can block parallelism, and constraint is +expressible by the user through existing synchronization mechanisms if they +require it. +-- +. It is currently an error if a set of command-queues passed to + {clEnqueueCommandBufferKHR} aren't compatible with those set on recording. + Should we relax this as an optional capability that allows an + implementation to do a more expensive command-buffer enqueue for this + case? ++ +-- +*RESOLVED*: Added as an optional feature. +-- + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2023-04-14 + ** First assigned version (provisional). + * Revision 0.9.1, 2023-04-30 + ** Added clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR as affected + functions (provisional). diff --git a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc new file mode 100644 index 00000000..beda74ae --- /dev/null +++ b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -0,0 +1,378 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_command_buffer_mutable_dispatch.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2022-08-31 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ewan Crawford, Codeplay Software Ltd. + - Gordon Brown, Codeplay Software Ltd. + - Kenneth Benzie, Codeplay Software Ltd. + - Alastair Murray, Codeplay Software Ltd. + - Jack Frankland, Codeplay Software Ltd. + - Balaji Calidas, Qualcomm Technologies Inc. + - Joshua Kelly, Qualcomm Technologies, Inc. + - Kevin Petit, Arm Ltd. + - Aharon Abramson, Intel. + - Ben Ashbaugh, Intel. + - Boaz Ouriel, Intel. + - Pekka Jääskeläinen, Tampere University + - Jan Solanti, Tampere University + - Nikhil Joshi, NVIDIA + - James Price, Google + +=== Description + +The `<>` extension separates command construction +from enqueue by providing a mechanism to record a set of commands which can +then be repeatedly enqueued. +However, the commands recorded to the command-buffer are immutable between +enqueues. + +`cl_khr_command_buffer_mutable_dispatch` removes this restriction. +In particular, this extension allows the configuration of a kernel execution +command in a command-buffer, called a _mutable-dispatch_, to be modified. +This allows inputs and outputs to the kernel, as well as work-item sizes and +offsets, to change without having to re-record the entire command sequence +in a new command-buffer. + +=== Interactions With Other Extensions + +The {cl_command_buffer_structure_type_khr_TYPE} type has been added to this +extension for the purpose of allowing expansion of mutable functionality in +future extensions layered on top of +`cl_khr_command_buffer_mutable_dispatch`. +Any parameter that is a structure containing a `void* next` member *must* +have a value of `next` that is either `NULL`, or is a pointer to a valid +structure defined by `cl_khr_command_buffer_mutable_dispatch` or an +extension layered on top. +To be a valid structure in the pointer chain the first member of the +structure *must* be a {cl_command_buffer_structure_type_khr_TYPE} identifier +for the structure being iterated through, and the second member a `void* +next` pointer to the next structure in the chain. + +[NOTE] +==== +This approach is based on structure pointer chains in Vulkan, for more +details see the "`Valid Usage for Structure Pointer Chains`" section of the +Vulkan specification. +==== + +This is designed so that another extension layered on +`cl_khr_command_buffer_mutable_dispatch` could allow modification of +commands recorded to a command-buffer other than kernel execution commands. +As all command recording entry-points return a {cl_mutable_command_khr_TYPE} +handle, and aspects like which {cl_mem_TYPE} object a command uses could +also be updated between enqueues of the command-buffer. + +=== New Types + + * {cl_mutable_dispatch_fields_khr_TYPE} + * {cl_mutable_command_info_khr_TYPE} + * {cl_command_buffer_structure_type_khr_TYPE} + * {cl_mutable_base_config_khr_TYPE} + * {cl_mutable_dispatch_asserts_khr_TYPE} + * {cl_mutable_dispatch_config_khr_TYPE} + * {cl_mutable_dispatch_exec_info_khr_TYPE} + * {cl_mutable_dispatch_arg_khr_TYPE} + +=== New Commands + + * {clUpdateMutableCommandsKHR} + * {clGetMutableCommandInfoKHR} + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} + * {cl_ndrange_kernel_command_properties_khr_TYPE} + ** {CL_MUTABLE_DISPATCH_ASSERTS_KHR} + ** {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} + * {cl_mutable_dispatch_asserts_khr_TYPE} + ** {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} + * {cl_mutable_dispatch_fields_khr_TYPE} + ** {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR} + ** {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR} + ** {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR} + ** {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR} + ** {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR} + * {cl_mutable_command_info_khr_TYPE} + ** {CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR} + ** {CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR} + ** {CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR} + ** {CL_MUTABLE_DISPATCH_KERNEL_KHR} + ** {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR} + ** {CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR} + ** {CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR} + ** {CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR} + ** {CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR} + * {cl_command_buffer_flags_khr_TYPE} + ** {CL_COMMAND_BUFFER_MUTABLE_KHR} + * {cl_command_buffer_properties_khr_TYPE} + ** {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} + * {cl_command_buffer_structure_type_khr_TYPE} + ** {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR} + ** {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR} + * New <> + ** {CL_INVALID_MUTABLE_COMMAND_KHR} + +=== Sample Code + +==== Sample Application Updating the Arguments to a Mutable-dispatch Between Command-buffer Submissions + +[source,opencl] +---- +#define CL_CHECK(ERROR) \ + if (ERROR) { \ + std::cerr << "OpenCL error: " << ERROR << "\n"; \ + return ERROR; \ + } + +int main() { + cl_platform_id platform; + CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); + cl_device_id device; + CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr)); + + cl_mutable_dispatch_fields_khr mutable_capabilities; + CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, + nullptr)); + if (!(mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR)) { + std::cerr + << "Device does not support update arguments to a mutable-dispatch, " + "skipping example.\n"; + return 0; + } + + cl_int error; + cl_context context = + clCreateContext(nullptr, 1, &device, nullptr, nullptr, &error); + CL_CHECK(error); + + const char* code = R"OpenCLC( +kernel void vector_addition(global int* tile1, global int* tile2, + global int* res) { + size_t index = get_global_id(0); + res[index] = tile1[index] + tile2[index]; +} +)OpenCLC"; + const size_t length = std::strlen(code); + + cl_program program = + clCreateProgramWithSource(context, 1, &code, &length, &error); + CL_CHECK(error); + + CL_CHECK(clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr)); + + cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); + CL_CHECK(error); + + // Set the parameters of the frames + constexpr size_t iterations = 60; + constexpr size_t elem_size = sizeof(cl_int); + constexpr size_t frame_width = 32; + constexpr size_t frame_count = frame_width * frame_width; + constexpr size_t frame_size = frame_count * elem_size; + + cl_mem input_A_buffers[2] = {nullptr, nullptr}; + cl_mem input_B_buffers[2] = {nullptr, nullptr}; + cl_mem output_buffers[2] = {nullptr, nullptr}; + + // Create the buffer to swap between even and odd kernel iterations + for (size_t i = 0; i < 2; i++) { + input_A_buffers[i] = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + + input_B_buffers[i] = + clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + + output_buffers[i] = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, frame_size, nullptr, &error); + CL_CHECK(error); + } + + cl_command_queue command_queue = + clCreateCommandQueue(context, device, 0, &error); + CL_CHECK(error); + + // Create command-buffer with mutable flag so we can update it + cl_command_buffer_properties_khr properties[3] = { + CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_MUTABLE_KHR, 0}; + cl_command_buffer_khr command_buffer = + clCreateCommandBufferKHR(1, &command_queue, properties, &error); + CL_CHECK(error); + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_A_buffers[0])); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &input_B_buffers[0])); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &output_buffers[0])); + + // Instruct the nd-range command to allow for mutable kernel arguments + cl_ndrange_kernel_command_properties_khr mutable_properties[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0}; + + // Create command handle for mutating nd-range command + cl_mutable_command_khr command_handle = nullptr; + + // Add the nd-range kernel command + error = clCommandNDRangeKernelKHR( + command_buffer, command_queue, mutable_properties, kernel, 1, nullptr, + &frame_count, nullptr, 0, nullptr, nullptr, &command_handle); + CL_CHECK(error); + + CL_CHECK(clFinalizeCommandBufferKHR(command_buffer)); + + // Prepare for random input generation + std::random_device random_device; + std::mt19937 random_engine{random_device()}; + std::uniform_int_distribution random_distribution{ + std::numeric_limits::min() / 2, + std::numeric_limits::max() / 2}; + + // Iterate over each frame + for (size_t i = 0; i < iterations; i++) { + // Set the buffers for the current frame + cl_mem input_A_buffer = input_A_buffers[i % 2]; + cl_mem input_B_buffer = input_B_buffers[i % 2]; + cl_mem output_buffer = output_buffers[i % 2]; + + // Generate input A data + std::vector input_a(frame_count); + std::generate(std::begin(input_a), std::end(input_a), + [&]() { return random_distribution(random_engine); }); + + // Write the generated data to the input A buffer + error = + clEnqueueWriteBuffer(command_queue, input_A_buffer, CL_FALSE, 0, + frame_size, input_a.data(), 0, nullptr, nullptr); + CL_CHECK(error); + + // Generate input B data + std::vector input_b(frame_count); + std::generate(std::begin(input_b), std::end(input_b), + [&]() { return random_distribution(random_engine); }); + + // Write the generated data to the input B buffer + error = + clEnqueueWriteBuffer(command_queue, input_B_buffer, CL_FALSE, 0, + frame_size, input_b.data(), 0, nullptr, nullptr); + CL_CHECK(error); + + // If not executing the first frame + if (i != 0) { + // Configure the mutable configuration to update the kernel arguments + cl_mutable_dispatch_arg_khr arg_0{0, sizeof(cl_mem), &input_A_buffer}; + cl_mutable_dispatch_arg_khr arg_1{1, sizeof(cl_mem), &input_B_buffer}; + cl_mutable_dispatch_arg_khr arg_2{2, sizeof(cl_mem), &output_buffer}; + cl_mutable_dispatch_arg_khr args[] = {arg_0, arg_1, arg_2}; + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command_handle, + 3 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */}; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config}; + + // Update the command buffer with the mutable configuration + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + CL_CHECK(error); + } + + // Enqueue the command buffer + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, nullptr, + nullptr); + CL_CHECK(error); + + // Allocate memory for the output data + std::vector output(frame_count); + + // Read the output data from the output buffer + error = clEnqueueReadBuffer(command_queue, output_buffer, CL_TRUE, 0, + frame_size, output.data(), 0, nullptr, nullptr); + CL_CHECK(error); + + // Flush and execute the read buffer + error = clFinish(command_queue); + CL_CHECK(error); + + // Verify the results of the frame + for (size_t i = 0; i < frame_count; ++i) { + const cl_int result = input_a[i] + input_b[i]; + if (output[i] != result) { + std::cerr << "Error: Incorrect result at index " << i << " - Expected " + << output[i] << " was " << result << std::endl; + std::exit(1); + } + } + } + + std::cout << "Result verified\n"; + + CL_CHECK(clReleaseCommandBufferKHR(command_buffer)); + for (size_t i = 0; i < 2; i++) { + CL_CHECK(clReleaseMemObject(input_A_buffers[i])); + CL_CHECK(clReleaseMemObject(input_B_buffers[i])); + CL_CHECK(clReleaseMemObject(output_buffers[i])); + } + CL_CHECK(clReleaseCommandQueue(command_queue)); + CL_CHECK(clReleaseKernel(kernel)); + CL_CHECK(clReleaseProgram(program)); + CL_CHECK(clReleaseContext(context)); + CL_CHECK(clReleaseDevice(device)); + return 0; +} +---- + +=== Issues + +. Include simpler, more user friendly, entry-points for updating kernel + arguments? ++ +-- +*RESOLVED*: Can be implemented in the ecosystem as a layer on top, if that +layer proves popular then can be introduced, possibly as another extension +on top. +-- + +. Add a command-buffer clone entry-point for deep copying a command-buffer? + Arguments could then be updated and both command-buffers used. + Useful for techniques like double buffering. ++ +-- +*RESOLVED*: In the use-case we're targeting a user would only have a handle +to the original command-buffer, but not the clone, which may limit the +usefulness of this capability. +Additionally, an implementation could be complicated by non-trivial deep +copying of the underlying objects contained in the command-buffer. +As a result of this new entry-point being an additive change to the +specification it is omitted, and if its functionality has demand later, it +may be a introduced as a stand alone extension. +-- + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2022-08-31 + ** First assigned version (provisional). + * Revision 0.9.1, 2023-11-07 + ** Add type {cl_mutable_dispatch_asserts_khr_TYPE} and its possible values + (provisional). diff --git a/api/cl_khr_create_command_queue.asciidoc b/api/cl_khr_create_command_queue.asciidoc new file mode 100644 index 00000000..fe6bf0c0 --- /dev/null +++ b/api/cl_khr_create_command_queue.asciidoc @@ -0,0 +1,58 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_create_command_queue.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_create_command_queue` allows OpenCL 1.x devices to support an +equivalent of the {clCreateCommandQueueWithProperties} API that was added in +OpenCL 2.0. +This allows OpenCL 1.x devices to support other optional extensions or +features that use the {clCreateCommandQueueWithProperties} API to specify +additional command-queue properties that cannot be specified using the +OpenCL 1.x {clCreateCommandQueue} API. + +No new command-queue properties are required by this extension. +Applications may use the existing {CL_DEVICE_QUEUE_PROPERTIES} query to +determine command-queue properties that are supported by the device. + +OpenCL 2.x devices may support this extension for compatibility. +In this scenario, the function added by this extension will have the same +capabilities as the core {clCreateCommandQueueWithProperties} API. +Applications that only target OpenCL 2.x devices should use the core OpenCL +2.x {clCreateCommandQueueWithProperties} API instead of this extension API. + +NOTE: The type of the property value passed as {CL_QUEUE_PROPERTIES} to +{clCreateCommandQueueWithPropertiesKHR} is specified as +{cl_bitfield_TYPE} while the type passed to +{clCreateCommandQueueWithProperties} is +{cl_command_queue_properties_TYPE}. +While this is not a promotion in terms of the suffixing, both types +are aliased to {cl_ulong_TYPE}, so no ABI or compiler issues should +result if the extension and core APIs are supported. + + +=== New Commands + + * {clCreateCommandQueueWithPropertiesKHR} + +=== New Types + + * {cl_queue_properties_khr_TYPE} + +//@ TODO Missing bitfield values allowed for CL_QUEUE_PROPERTIES? + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_d3d10_sharing.asciidoc b/api/cl_khr_d3d10_sharing.asciidoc new file mode 100644 index 00000000..ace32dbe --- /dev/null +++ b/api/cl_khr_d3d10_sharing.asciidoc @@ -0,0 +1,136 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_d3d10_sharing.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_d3d10_sharing` provides interoperability between OpenCL and Direct3D 10. + +=== New Types + + * {cl_d3d10_device_source_khr_TYPE} + * {cl_d3d10_device_set_khr_TYPE} + +=== New Commands + + * {clGetDeviceIDsFromD3D10KHR} + * {clCreateFromD3D10BufferKHR} + * {clCreateFromD3D10Texture2DKHR} + * {clCreateFromD3D10Texture3DKHR} + * {clEnqueueAcquireD3D10ObjectsKHR} + * {clEnqueueReleaseD3D10ObjectsKHR} + +=== New Tokens + + * {cl_d3d10_device_source_khr_TYPE} + ** {CL_D3D10_DEVICE_KHR} + ** {CL_D3D10_DXGI_ADAPTER_KHR} + * {cl_d3d10_device_set_khr_TYPE} + ** {CL_PREFERRED_DEVICES_FOR_D3D10_KHR} + ** {CL_ALL_DEVICES_FOR_D3D10_KHR} + * {cl_context_properties_TYPE} + ** {CL_CONTEXT_D3D10_DEVICE_KHR} + * {cl_context_info_TYPE} + ** {CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR} + * {cl_mem_info_TYPE} + ** {CL_MEM_D3D10_RESOURCE_KHR} + * {cl_image_info_TYPE} + ** {CL_IMAGE_D3D10_SUBRESOURCE_KHR} + * {cl_event_info_TYPE} + ** {CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR} + ** {CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR} + * New Error Codes + ** {CL_INVALID_D3D10_DEVICE_KHR} + ** {CL_INVALID_D3D10_RESOURCE_KHR} + ** {CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR} + ** {CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR} + + +=== Issues + + . Should this extension be KHR or EXT? ++ +-- +*PROPOSED*: KHR. +If this extension is to be approved by Khronos then it should be KHR, +otherwise EXT. +Not all platforms can support this extension, but that is also true of +OpenGL interop. + +*RESOLVED*: KHR. +-- + + . Requiring SharedHandle on ID3D10Resource ++ +-- +Requiring this can largely simplify things at the DDI level and make some +implementations faster. +However, the DirectX spec only defines the shared handle for a subset of the +resources we would like to support: + + * `D3D10_RESOURCE_MISC_SHARED` - Enables the sharing of resource data + between two or more Direct3D devices. + The only resources that can be shared are 2D non-mipmapped textures. + +*PROPOSED*: A: Add wording to the spec about some implementations needing +the resource setup as shared: + +Some implementations may require the resource to be shared on the D3D10 side +of the API. + +If we do that, do we need another enum to describe this failure case? + +*PROPOSED*: B: Require that all implementations support both shared and +non-shared resources. +The restrictions prohibiting multisample textures and the flag +D3D10_USAGE_IMMUTABLE guarantee software access to all shareable resources. + +*RESOLVED*: Require that implementations support both +D3D10_RESOURCE_MISC_SHARED being set and not set. +Add the query for {CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR} to +determine on a per-context basis which method will be faster. +-- + + . Texture1D support ++ +-- +There is not a matching CL type, so do we want to support this and map to +buffer or Texture2D? + +*RESOLVED*: We will not add support for ID3D10Texture1D objects unless a +corresponding OpenCL 1D Image type is created. +-- + + . CL/D3D10 queries ++ +-- +The GL interop has {clGetGLObjectInfo} and {clGetGLTextureInfo}. +It is unclear if these are needed on the D3D10 interop side since the D3D10 +spec makes these queries trivial on the D3D10 object itself. +Also, not all of the semantics of the GL call map across. + +*PROPOSED*: Add the {clGetMemObjectInfo} and {clGetImageInfo} parameter +names {CL_MEM_D3D10_RESOURCE_KHR} and {CL_IMAGE_D3D10_SUBRESOURCE_KHR} to +query the D3D10 resource from which a {cl_mem_TYPE} was created. +From this data, any D3D10 side information may be queried using the D3D10 +API. + +*RESOLVED*: We will use {clGetMemObjectInfo} and {clGetImageInfo} to access +this information. +-- + + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_d3d11_sharing.asciidoc b/api/cl_khr_d3d11_sharing.asciidoc new file mode 100644 index 00000000..884044ed --- /dev/null +++ b/api/cl_khr_d3d11_sharing.asciidoc @@ -0,0 +1,56 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_d3d11_sharing.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_d3d11_sharing` provides interoperability between OpenCL and Direct3D 11. + +=== New Commands + + * {clGetDeviceIDsFromD3D11KHR} + * {clCreateFromD3D11BufferKHR} + * {clCreateFromD3D11Texture2DKHR} + * {clCreateFromD3D11Texture3DKHR} + * {clEnqueueAcquireD3D11ObjectsKHR} + * {clEnqueueReleaseD3D11ObjectsKHR} + +=== New Tokens + + * {cl_d3d11_device_source_khr_TYPE} + ** {CL_D3D11_DEVICE_KHR} + ** {CL_D3D11_DXGI_ADAPTER_KHR} + * {cl_d3d11_device_set_khr_TYPE} + ** {CL_PREFERRED_DEVICES_FOR_D3D11_KHR} + ** {CL_ALL_DEVICES_FOR_D3D11_KHR} + * {cl_context_properties_TYPE} + ** {CL_CONTEXT_D3D11_DEVICE_KHR} + * {cl_context_info_TYPE} + ** {CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR} + * {cl_mem_info_TYPE} + ** {CL_MEM_D3D11_RESOURCE_KHR} + * {cl_image_info_TYPE} + ** {CL_IMAGE_D3D11_SUBRESOURCE_KHR} + * {cl_event_info_TYPE} + ** {CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR} + ** {CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR} + * New Error Codes + ** {CL_INVALID_D3D11_DEVICE_KHR} + ** {CL_INVALID_D3D11_RESOURCE_KHR} + ** {CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR} + ** {CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_depth_images.asciidoc b/api/cl_khr_depth_images.asciidoc new file mode 100644 index 00000000..73469eec --- /dev/null +++ b/api/cl_khr_depth_images.asciidoc @@ -0,0 +1,25 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_depth_images.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_depth_images` adds OpenCL C support for depth images. + +See the link:{OpenCLCSpecURL}#cl_khr_depth_images[Depth Images] section of +the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_device_enqueue_local_arg_types.asciidoc b/api/cl_khr_device_enqueue_local_arg_types.asciidoc new file mode 100644 index 00000000..ee3acb41 --- /dev/null +++ b/api/cl_khr_device_enqueue_local_arg_types.asciidoc @@ -0,0 +1,29 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_device_enqueue_local_arg_types.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_device_enqueue_local_arg_types` allows arguments to blocks that are +passed to the *enqueue_kernel* built-in OpenCL C function to be pointers to +any type (built-in or user-defined) in local memory, instead of requiring +arguments to blocks to be pointers to void in local memory. + +See the link:{OpenCLCSpecURL}#cl_khr_device_enqueue_local_arg_types[Device +Enqueue Local Argument Types] section of the OpenCL C specification for more +information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_device_uuid.asciidoc b/api/cl_khr_device_uuid.asciidoc new file mode 100644 index 00000000..023b34df --- /dev/null +++ b/api/cl_khr_device_uuid.asciidoc @@ -0,0 +1,40 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_device_uuid.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + DateTBD +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_device_uuid` adds the ability to query a universally unique +identifier (UUID) for an OpenCL driver and OpenCL device. +The UUIDs returned by the query may be used to identify drivers and devices +across processes or APIs. + +=== New Enums + +Accepted value for the _param_name_ parameter to {clGetDeviceInfo}: + + * {cl_device_info} + ** {CL_DEVICE_UUID_KHR} + ** {CL_DRIVER_UUID_KHR} + ** {CL_DEVICE_LUID_VALID_KHR} + ** {CL_DEVICE_LUID_KHR} + ** {CL_DEVICE_NODE_MASK_KHR} + * Constants describing the size of the driver and device UUIDs, and the + device LUID: + ** {CL_UUID_SIZE_KHR} + ** {CL_LUID_SIZE_KHR} + +=== Version History + + * Revision 1.0.0, 2020-08-27 + ** First assigned version. diff --git a/api/cl_khr_dx9_media_sharing.asciidoc b/api/cl_khr_dx9_media_sharing.asciidoc new file mode 100644 index 00000000..92e8ed51 --- /dev/null +++ b/api/cl_khr_dx9_media_sharing.asciidoc @@ -0,0 +1,65 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_dx9_media_sharing.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_dx9_media_sharing` allows applications to use media surfaces as +OpenCL memory objects. +This allows efficient sharing of data between OpenCL and selected adapter +APIs (only DX9 for now). +If this extension is supported, an OpenCL image object can be created from a +media surface and the OpenCL API can be used to execute kernels that read +and/or write memory objects that are media surfaces. +Note that OpenCL memory objects may be created from the adapter media +surface if and only if the OpenCL context has been created from that +adapter. + +=== New Commands + + * {clGetDeviceIDsFromDX9MediaAdapterKHR} + * {clCreateFromDX9MediaSurfaceKHR} + * {clEnqueueAcquireDX9MediaSurfacesKHR} + * {clEnqueueReleaseDX9MediaSurfacesKHR} + +=== New Tokens + + * {cl_dx9_media_adapter_type_khr_TYPE} + ** {CL_ADAPTER_D3D9_KHR} + ** {CL_ADAPTER_D3D9EX_KHR} + ** {CL_ADAPTER_DXVA_KHR} + * {cl_dx9_media_adapter_set_khr_TYPE} + ** {CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR} + ** {CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR} + * {cl_context_info_TYPE} + ** {CL_CONTEXT_ADAPTER_D3D9_KHR} + ** {CL_CONTEXT_ADAPTER_D3D9EX_KHR} + ** {CL_CONTEXT_ADAPTER_DXVA_KHR} + * {cl_mem_info_TYPE} + ** {CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR} + ** {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} + * {cl_image_info_TYPE} + ** {CL_IMAGE_DX9_MEDIA_PLANE_KHR} + * {cl_event_info_TYPE} + ** {CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR} + ** {CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR} + * New Error Codes + ** {CL_INVALID_DX9_MEDIA_ADAPTER_KHR} + ** {CL_INVALID_DX9_MEDIA_SURFACE_KHR} + ** {CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR} + ** {CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_egl_event.asciidoc b/api/cl_khr_egl_event.asciidoc new file mode 100644 index 00000000..86b5fbb3 --- /dev/null +++ b/api/cl_khr_egl_event.asciidoc @@ -0,0 +1,72 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_egl_event.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_egl_event` allows creating OpenCL event objects linked to EGL fence +sync objects, potentially improving efficiency of sharing images and buffers +between the two APIs. +The companion `EGL_KHR_cl_event` extension provides the complementary +functionality of creating an EGL sync object from an OpenCL event object. + +=== New Commands + + * {clCreateEventFromEGLSyncKHR} + +=== New Tokens + + * New Error Codes + ** {CL_INVALID_EGL_OBJECT_KHR} + ** {CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR} + +=== Issues + +Most issues are shared with `<>` and are resolved as +described in that extension. + + . Should we support implicit synchronization? ++ +-- +*RESOLVED*: No, as this may be very difficult since the synchronization +would not be with EGL, it would be with currently bound EGL client APIs. +It would be necessary to know which client APIs might be bound, to validate +that they're associated with the `EGLDisplay` associated with the OpenCL +context, and to reach into each such context. +-- + + . Do we need to have typedefs to use EGL handles in OpenCL? ++ +-- +*RESOLVED* Using typedefs for EGL handles. +-- + + . Should we restrict which CL APIs can be used with this cl_event? ++ +-- +*RESOLVED* Use is limited to {clEnqueueAcquire}*** calls only. +-- + + . What is the desired behaviour for this extension when EGLSyncKHR is of a + type other than `EGL_SYNC_FENCE_KHR`? ++ +-- +*RESOLVED* This extension only requires support for `EGL_SYNC_FENCE_KHR`. +Support of other types is an implementation choice, and will result in +CL_INVALID_EGL_OBJECT_KHR if unsupported. +-- + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_egl_image.asciidoc b/api/cl_khr_egl_image.asciidoc new file mode 100644 index 00000000..d324637a --- /dev/null +++ b/api/cl_khr_egl_image.asciidoc @@ -0,0 +1,103 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_egl_image.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_egl_image` provides a mechanism to creating OpenCL memory objects +from from EGLImages. + +=== New Commands + + * {clCreateFromEGLImageKHR} + * {clEnqueueAcquireEGLObjectsKHR} + * {clEnqueueReleaseEGLObjectsKHR} + * {cl_event_info_TYPE} + ** {CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR} + ** {CL_COMMAND_RELEASE_EGL_OBJECTS_KHR} + * New Error Codes + ** {CL_EGL_RESOURCE_NOT_ACQUIRED_KHR} + ** {CL_INVALID_EGL_OBJECT_KHR} + +=== Issues + + . This extension does not support reference counting of the images, so the + onus is on the application to behave sensibly and not release the + underlying {cl_mem_TYPE} object while the `EGLImage` is still being + used. + . In order to ensure data integrity, the application is responsible for + synchronizing access to shared CL/EGL image objects by their respective + APIs. + Failure to provide such synchronization may result in race conditions + and other undefined behavior. + This may be accomplished by calling {clWaitForEvents} with the event + objects returned by any OpenCL commands which use the shared image + object or by calling {clFinish}. + . Currently {CL_MEM_READ_ONLY} is the only supported flag for _flags_. ++ +-- +*RESOLVED*: Implementation will now return an error if writing to a shared +object that is not supported rather than disallowing it entirely. +-- + . Currently restricted to 2D image objects. + . What should happen for YUV color-space conversion, multi plane images, + and chroma-siting, and channel mapping? ++ +-- +*RESOLVED*: YUV is no longer explicitly described in this extension. +Before this removal the behavior was dependent on the platform. +This extension explicitly leaves the YUV layout to the platform and `EGLImage` +source extension (i.e. is implementation specific). +Colorspace conversion must be applied by the application using a color +conversion matrix. + +The expected extension path if YUV color-space conversion is to be supported +is to introduce a YUV image type and provide overloaded versions of the +read_image built-in functions. + +Getting image information for a YUV image should return the original image +size (non quantized size) when all of Y U and V are present in the image. +If the planes have been separated then the actual dimensionality of the +separated plane should be reported. +For example with YUV 4:2:0 (NV12) with a YUV image of 256x256, the Y only +image would return 256x256 whereas the UV only image would return 128x128. +-- + . Should an attribute list be used instead? ++ +-- +*RESOLVED*: function has been changed to use an attribute list. +-- + . What should happen for `EGLImage` extensions which introduce formats + without a mapping to an OpenCL image channel data type or channel order? ++ +-- +*RESOLVED*: This extension does not define those formats. +It is expected that as additional EGL extensions are added to create EGL +images from other sources, an extension to CL will be introduced where +needed to represent those image types. +-- + . What are the guarantees to synchronization behavior provided by the + implementation? ++ +-- +The basic portable form of synchronization is to use a {clFinish}, as is the +case for GL interop. +In addition implementations which support the synchronization extensions +`<>` and `EGL_KHR_cl_event` can interoperate more +efficiently as described in those extensions. +-- + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_expect_assume.asciidoc b/api/cl_khr_expect_assume.asciidoc new file mode 100644 index 00000000..c5559a13 --- /dev/null +++ b/api/cl_khr_expect_assume.asciidoc @@ -0,0 +1,75 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_expect_assume.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-11-10 +*Interactions and External Dependencies*:: + The initial version of this extension extends the OpenCL SPIR-V + environment to support new instructions. + Please refer to the OpenCL SPIR-V Environment Specification that + describes how this extension modifies the OpenCL SPIR-V environment. +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_expect_assume` adds mechanisms to provide information to the +compiler that may improve the performance of some kernels. +Specifically, this extension adds the ability to: + + * Tell the compiler the _expected_ value of a variable. + * Allow the compiler to _assume_ a condition is true. + +These functions are not required for functional correctness. + +The initial version of this extension extends the OpenCL SPIR-V environment +to support new instructions for offline compilation tool chains. +Similar functionality may be provided by some OpenCL C online compilation +tool chains, but formal support in OpenCL C is not required by the initial +version of the extension. + +=== Sample Code + +Although this extension does not formally extend OpenCL C, the ability to +provide _expect_ and _assume_ information is supported by many OpenCL C +compiler tool chains. +The sample code below describes how to test for and provide _expect_ and +_assume_ information to compilers based on Clang: + +[source,opencl_c] +---- +// __has_builtin is an optional compiler feature that is supported by Clang. +// If this feature is not supported, we will assume the builtin is not present. +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +kernel void test(global int* dst, global int* src) +{ + int value = src[get_global_id(0)]; + + // Tell the compiler that the most likely source value is zero. +#if __has_builtin(__builtin_expect) + value = __builtin_expect(value, 0); +#endif + + // Tell the compiler that the source value is non-negative. + // Behavior is undefined if the source value is actually negative. +#if __has_builtin(__builtin_assume) + __builtin_assume(value >= 0); +#endif + + dst[get_global_id(0)] = value % 4; +} +---- + +=== Version History + + * Revision 1.0.0, 2021-11-10 + ** First assigned version. diff --git a/api/cl_khr_extended_async_copies.asciidoc b/api/cl_khr_extended_async_copies.asciidoc new file mode 100644 index 00000000..0fac1890 --- /dev/null +++ b/api/cl_khr_extended_async_copies.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_extended_async_copies.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-11-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_extended_async_copies` augments built-in OpenCL C asynchronous copy +functions to support more patterns: + + . For async copy between 2D source and 2D destination. + . For async copy between 3D source and 3D destination. + +See the link:{OpenCLCSpecURL}#cl_khr_extended_async_copies[Extended Async +Copy Functions] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 0.9.0, 2020-04-21 + ** First assigned version (provisional). + * Revision 0.9.1, 2021-09-06 + ** Elements-based proposal update. + * Revision 1.0.0, 2021-11-10 + ** First non-provisional version. diff --git a/api/cl_khr_extended_bit_ops.asciidoc b/api/cl_khr_extended_bit_ops.asciidoc new file mode 100644 index 00000000..b516f1a2 --- /dev/null +++ b/api/cl_khr_extended_bit_ops.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_extended_bit_ops.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-04-22 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_extended_bit_ops` adds built-in OpenCL C functions for performing +extended bit operations. +Specifically, the following functions are added: + + * bitfield insert: insert bits from one source operand into another source + operand. + * bitfield extract: extract bits from a source operand, with sign- or + zero-extension. + * bit reverse: reverse the bits of a source operand. + +See the link:{OpenCLCSpecURL}#cl_khr_extended_bit_ops[Extended Bit +Operations] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2021-04-22 + ** Initial version. diff --git a/api/cl_khr_extended_versioning.asciidoc b/api/cl_khr_extended_versioning.asciidoc new file mode 100644 index 00000000..4cf053be --- /dev/null +++ b/api/cl_khr_extended_versioning.asciidoc @@ -0,0 +1,158 @@ +// Copyright 2019-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_extended_versioning.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-02-12 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kévin Petit, Arm Ltd. + - Ben Ashbaugh, Intel + - Alastair Murray, Codeplay Software Ltd. + - Einar Hov, Arm Ltd. + +=== Description + +The `cl_khr_extended_versioning` extension introduces new platform and +device queries that return detailed version information to applications. +It makes it possible to return the exact revision of the specification or +intermediate languages supported by an implementation. +It also enables implementations to communicate a version number for each of +the extensions they support and remove the requirement for applications to +process strings to test for the presence of an extension or intermediate +language or built-in kernel. + +Extended versioning was promoted to a core feature in OpenCL 3.0. +However, the query for {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} was replaced +by the query for {CL_DEVICE_OPENCL_C_ALL_VERSIONS}. +With the exception of this query, all types, structures, enums, and macro +names defined by this extension are equivalent to the corresponding core +name (with the `_KHR` or `_khr` suffix removed). + +The version number encoding scheme is described in the <> section. + +=== New Types + + * {cl_version_khr_TYPE} + +=== New Structures + + * {cl_name_version_khr_TYPE} + * {CL_NAME_VERSION_MAX_NAME_SIZE_KHR_anchor} + +=== New Macro Names + + * {CL_VERSION_MAJOR_BITS_KHR_anchor} + * {CL_VERSION_MINOR_BITS_KHR_anchor} + * {CL_VERSION_PATCH_BITS_KHR_anchor} + * `CL_VERSION_MAJOR_MASK_KHR` + * `CL_VERSION_MINOR_MASK_KHR` + * `CL_VERSION_PATCH_MASK_KHR` + * `CL_VERSION_MAJOR_KHR` + * `CL_VERSION_MINOR_KHR` + * `CL_VERSION_PATCH_KHR` + * `CL_MAKE_VERSION_KHR` + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_NUMERIC_VERSION_KHR} + ** {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} + ** {CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR} + ** {CL_DEVICE_ILS_WITH_VERSION_KHR} + ** {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} + * {cl_platform_info_TYPE} + ** {CL_PLATFORM_NUMERIC_VERSION_KHR} + ** {CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR} + +=== Conformance Tests + +. Each of the new queries described in this extension must be attempted and + succeed. +. It must be verified that the information returned by all queries that + extend existing queries is consistent with the information returned by + existing queries. +. Some of the queries introduced by this extension impose uniqueness constraints + on the list of returned values. + It must be verified that these constraints are satisfied. + +=== Issues + +. What compatibility policy should we define? e.g. a _revision_ has to be + backwards-compatible with previous ones ++ +-- +*RESOLVED*: No general rules as that wouldn't be testable. +Here's a recommended policy: + + - Patch version bump: only clarifications and small/obvious bugfixes. + - Minor version bump: backwards-compatible changes only. + - Major version bump: backwards compatibility may break. +-- + +. Do we want versioning for built-in kernels as returned by {CL_DEVICE_BUILT_IN_KERNELS}? ++ +-- +*RESOLVED*: No immediate use-case for versioning but being able to get a + list of individual kernels without parsing a string is desirable. + Adding {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR}. +-- + +. What is the behaviour of the queries that return an array of structures when +there are no elements to return? ++ +-- +*RESOLVED*: The query succeeds and the size returned is zero. +-- + +. What value should be returned when version information is not available? ++ +-- +*RESOLVED*: If a patch version is not available, it should be reported as 0. + If no version information is available, 0.0.0 should be + reported. + These values have been chosen as they are guaranteed to be lower + than or equal to any other version. +-- + +. Should we add a query to report SPIR-V extended instruction sets? ++ +-- +*RESOLVED*: It is unlikely that we will introduce many SPIR-V extended + instruction sets without an accompanying API extension. + Decided not to do this. +-- + +. Should the queries for which the old-style query doesn't exist in a given + OpenCL version be present (e.g. + {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} prior to OpenCL 2.1 or + without support for `<>` or + {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} on OpenCL 1.0)? ++ +-- +*RESOLVED*: All the queries are always present. + {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} returns an empty + set when Intermediate Languages are not supported. + {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} always returns 1.0 on + an OpenCL 1.0 platform. +-- + +. Is reporting multiple Intermediate Languages with the same name and major/minor + versions but differing patch versions allowed? ++ +-- +*RESOLVED*: No. + This isn't aligned with the intended use for patch versions and + makes it harder for implementations to guarantee consistency + with the existing IL queries. +-- + +=== Version History + + * Revision 1.0.0, 2020-02-12 + ** Initial version. diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc new file mode 100644 index 00000000..0685d72c --- /dev/null +++ b/api/cl_khr_external_memory.asciidoc @@ -0,0 +1,316 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_memory.txt[] + +//@ *Revision*:: +//@ 0.9.3 +//@ *Extension and Version Dependencies*:: +//@ This extension requires OpenCL 3.0. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_memory` defines a generic mechanism to share buffer and +image objects between OpenCL and many other APIs, including: + + * Optional properties to import external memory exported by other APIs + into OpenCL for a set of devices. + * Routines to explicitly hand off memory ownership between OpenCL and + other APIs. + +Other related extensions define specific external memory types that may be +imported into OpenCL. + + +==== Background + +TODO + +==== Rationale + +TODO + +=== Interactions With Other Extensions + +TODO + +// The 'New ...' section can be auto-generated + +=== New Commands + + * {clEnqueueAcquireExternalMemObjectsKHR} + * {clEnqueueReleaseExternalMemObjectsKHR} + +=== New Structures + + * None + +=== New Types + + * {cl_external_memory_handle_type_khr_TYPE} + +=== New Enums + + * {cl_platform_info_TYPE} + ** {CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} + * {cl_device_info_TYPE} + ** {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} + ** {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + * {cl_mem_properties_TYPE} + ** {CL_MEM_DEVICE_HANDLE_LIST_KHR} + ** {CL_MEM_DEVICE_HANDLE_LIST_END_KHR} + * Return values from from {clGetEventInfo} when _param_name_ is + {CL_EVENT_COMMAND_TYPE}: + ** {CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR} + ** {CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR} + +[[cl_khr_external_memory-Sample-Code]] +=== Sample Code + +==== Example for Creating a CL Buffer From an Exported External Buffer in a Single Device Context + +This example also requires use of the `<>` +extension. + +[source] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with just first device +clCreateContext(..., 1, devices, ...); + +// Obtain fd/win32 or similar handle for external memory to be imported +// from other API. +int fd = getFdForExternalMemory(); + +// Create extMemBuffer of type cl_mem from fd. +cl_mem_properties_khr extMemProperties[] = +{ + (cl_mem_properties_khr)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, + (cl_mem_properties_khr)fd, + 0 +}; + +cl_mem extMemBuffer = clCreateBufferWithProperties(/*context*/ clContext, + /*properties*/ extMemProperties, + /*flags*/ 0, + /*size*/ size, + /*host_ptr*/ NULL, + /*errcode_ret*/ &errcode_ret); +---- + + +==== Example for Creating a CL Image From an Exported External Image for Single Device Usage in a Multi-Device Context + +This example also requires use of the `<>` +extension. + +[source] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with first two devices +clCreateContext(..., 2, devices, ...); + +// Create img of type cl_mem usable only on devices[0] + +// Create img of type cl_mem. +// Obtain fd/win32 or similar handle for external memory to be imported +// from other API. +int fd = getFdForExternalMemory(); + +// Set cl_image_format based on external image info +cl_image_format clImgFormat = { }; +clImageFormat.image_channel_order = CL_RGBA; +clImageFormat.image_channel_data_type = CL_UNORM_INT8; + +// Set cl_image_desc based on external image info +size_t clImageFormatSize; +cl_image_desc image_desc = { }; +image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; +image_desc.image_width = width; +image_desc.image_height = height; +image_desc.image_depth = depth; +image_desc.image_array_size = num_slices; +image_desc.image_row_pitch = width * 8 * 4; // May need alignment +image_desc.image_slice_pitch = image_desc.image_row_pitch * height; +image_desc.num_mip_levels = 1; +image_desc.num_samples = 0; +image_desc.buffer = NULL; + +cl_mem_properties_khr extMemProperties[] = { + (cl_mem_properties_khr)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, + (cl_mem_properties_khr)fd, + (cl_mem_properties_khr)CL_MEM_DEVICE_HANDLE_LIST_KHR, + (cl_mem_properties_khr)devices[0], + CL_MEM_DEVICE_HANDLE_LIST_END_KHR, + 0 +}; + +cl_mem img = clCreateImageWithProperties(/*context*/ clContext, + /*properties*/ extMemProperties, + /*flags*/ 0, + /*image_format*/ &clImgFormat, + /*image_desc*/ &image_desc, + /*errcode_ret*/ &errcode_ret); + +// Use clGetImageInfo to get cl_image_format details. +size_t clImageFormatSize; +clGetImageInfo(img, + CL_IMAGE_FORMAT, + sizeof(cl_image_format), + &clImageFormat, + &clImageFormatSize); +---- + + +==== Example for Synchronization Using Wait and Signal + +[source] +---- +// Start the main rendering loop + +// Create extSem of type cl_semaphore_khr using clCreateSemaphoreWithPropertiesKHR + +// Create extMem of type cl_mem using clCreateBufferWithProperties or clCreateImageWithProperties + +while (true) { + // (not shown) Signal the semaphore from the other API + + // Wait for the semaphore in OpenCL, by calling clEnqueueWaitSemaphoresKHR on 'extSem' + clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &extSem, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel that accesses extMem + clEnqueueNDRangeKernel(command_queue, ...); + + // Signal the semaphore in OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &extSem, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // (not shown) Launch work in other API that waits on 'extSem' +} +---- + +==== Example With Memory Sharing Using Acquire/Release + +[source] +---- +// Create extSem of type cl_semaphore_khr using +// clCreateSemaphoreWithPropertiesKHR with CL_SEMAPHORE_HANDLE_*_KHR. + +// Create extMem1 and extMem2 of type cl_mem using clCreateBufferWithProperties +// or clCreateImageWithProperties + +while (true) { + // (not shown) Signal the semaphore from the other API. Wait for the + // semaphore in OpenCL, by calling clEnqueueWaitForSemaphore on extSem + clEnqueueWaitSemaphoresKHR(/*command_queue*/ cq1, + /*num_sema_objects*/ 1, + /*sema_objects*/ &extSem, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Get explicit ownership of extMem1 + clEnqueueAcquireExternalMemObjectsKHR(/*command_queue*/ cq1, + /*num_mem_objects*/ 1, + /*mem_objects*/ extMem1, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel that accesses extMem1 on cq1 on cl_device1 + clEnqueueNDRangeKernel(cq1, ..., &event1); + + // Launch kernel that accesses both extMem1 and extMem2 on cq2 on cl_device2 + // Migration of extMem1 and extMem2 handles through regular CL memory + // migration. + clEnqueueNDRangeKernel(cq2, ..., &event1, &event2); + + // Give up ownership of extMem1 before you signal the semaphore. Handle + // memory migration here. + clEnqueueReleaseExternalMemObjectsKHR(/*command_queue*/ cq2 + /*num_mem_objects*/ 1, + /*mem_objects*/ &extMem1, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Signal the semaphore from OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ cq2, + /*num_sema_objects*/ 1, + /*sema_objects*/ &extSem, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // (not shown) Launch work in other API that waits on 'extSem' + // Other API accesses ext1, but not ext2 on device-1 +} +---- + +=== Issues + +. How should the import of images that are created in external APIs with + non-linear tiling be robustly handled? ++ +-- +*UNRESOLVED* +-- + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-05-04 + ** Clarified device handle list enum cannot be specified without an + external memory handle (provisional). + * Revision 0.9.2, 2023-08-01 + ** Changed device handle list enum to the memory-specific + {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). + * Revision 0.9.3, 2023-08-29 + ** Added query for + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + (provisional). diff --git a/api/cl_khr_external_memory_dma_buf.asciidoc b/api/cl_khr_external_memory_dma_buf.asciidoc new file mode 100644 index 00000000..19d54ffa --- /dev/null +++ b/api/cl_khr_external_memory_dma_buf.asciidoc @@ -0,0 +1,92 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_memory_dma_buf.txt[] + +//@ *Revision*:: +//@ 0.9.3 +//@ *Extension and Version Dependencies*:: +//@ This extension requires OpenCL 3.0. +//@ This extension requires the `<>` extension. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_memory_dma_buf` extends +{cl_external_memory_handle_type_khr_TYPE} to support Linux `dma_buf` as an +external memory handle type that may be specified when creating a buffer or +image memory object. + +==== Background + +TODO + +==== Rationale + +TODO + +=== Interactions With Other Extensions + +TODO + +// The 'New ...' section can be auto-generated + +=== New Commands + + None + +=== New Structures + + * None + +=== New Types + + * None + +=== New Enums + + * {cl_external_memory_handle_type_khr_TYPE} + ** {CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-05-04 + ** Clarified device handle list enum cannot be specified without an + external memory handle (provisional). + * Revision 0.9.2, 2023-08-01 + ** Changed device handle list enum to the memory-specific + {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). + * Revision 0.9.3, 2023-08-29 + ** Added query for + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + (provisional). diff --git a/api/cl_khr_external_memory_dx.asciidoc b/api/cl_khr_external_memory_dx.asciidoc new file mode 100644 index 00000000..c2fbf618 --- /dev/null +++ b/api/cl_khr_external_memory_dx.asciidoc @@ -0,0 +1,95 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_memory_dx.txt[] + +//@ *Revision*:: +//@ 0.9.3 +//@ *Extension and Version Dependencies*:: +//@ This extension requires OpenCL 3.0. +//@ This extension requires the `<>` extension. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_memory_dx` extends +{cl_external_memory_handle_type_khr_TYPE} to support Windows handles +referring to Direct 3D resources as external memory handle types that may be +specified when creating a buffer or image memory object. + +==== Background + +TODO + +==== Rationale + +TODO + +=== Interactions With Other Extensions + +TODO + +// The 'New ...' section can be auto-generated + +=== New Commands + + None + +=== New Structures + + * None + +=== New Types + + * None + +=== New Enums + + * {cl_external_memory_handle_type_khr_TYPE} + ** {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR} + ** {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR} + ** {CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR} + ** {CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-05-04 + ** Clarified device handle list enum cannot be specified without an + external memory handle (provisional). + * Revision 0.9.2, 2023-08-01 + ** Changed device handle list enum to the memory-specific + {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). + * Revision 0.9.3, 2023-08-29 + ** Added query for + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + (provisional). diff --git a/api/cl_khr_external_memory_opaque_fd.asciidoc b/api/cl_khr_external_memory_opaque_fd.asciidoc new file mode 100644 index 00000000..990582b4 --- /dev/null +++ b/api/cl_khr_external_memory_opaque_fd.asciidoc @@ -0,0 +1,92 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_memory_opaque_fd.txt[] + +//@ *Revision*:: +//@ 0.9.3 +//@ *Extension and Version Dependencies*:: +//@ This extension requires OpenCL 3.0. +//@ This extension requires the `<>` extension. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_memory_opaque_fd` extends +{cl_external_memory_handle_type_khr_TYPE} to support a POSIX file descriptor +handle as an external memory handle type that may be specified when creating +a buffer or image memory object. + +==== Background + +TODO + +==== Rationale + +TODO + +=== Interactions With Other Extensions + +TODO + +// The 'New ...' section can be auto-generated + +=== New Commands + + None + +=== New Structures + + * None + +=== New Types + + * None + +=== New Enums + + * {cl_external_memory_handle_type_khr_TYPE} + ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-05-04 + ** Clarified device handle list enum cannot be specified without an + external memory handle (provisional). + * Revision 0.9.2, 2023-08-01 + ** Changed device handle list enum to the memory-specific + {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). + * Revision 0.9.3, 2023-08-29 + ** Added query for + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + (provisional). diff --git a/api/cl_khr_external_memory_win32.asciidoc b/api/cl_khr_external_memory_win32.asciidoc new file mode 100644 index 00000000..fdbb7e75 --- /dev/null +++ b/api/cl_khr_external_memory_win32.asciidoc @@ -0,0 +1,93 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_memory_win32.txt[] + +//@ *Revision*:: +//@ 0.9.3 +//@ *Extension and Version Dependencies*:: +//@ This extension requires OpenCL 3.0. +//@ This extension requires the `<>` extension. + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_memory_win32` extends +{cl_external_memory_handle_type_khr_TYPE} to support Windows handles as +external memory handle types that may be specified when creating a buffer or +image memory object. + +==== Background + +TODO + +==== Rationale + +TODO + +=== Interactions With Other Extensions + +TODO + +// The 'New ...' section can be auto-generated + +=== New Commands + + None + +=== New Structures + + * None + +=== New Types + + * None + +=== New Enums + + * {cl_external_memory_handle_type_khr_TYPE} + ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR} + ** {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-05-04 + ** Clarified device handle list enum cannot be specified without an + external memory handle (provisional). + * Revision 0.9.2, 2023-08-01 + ** Changed device handle list enum to the memory-specific + {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). + * Revision 0.9.3, 2023-08-29 + ** Added query for + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + (provisional). diff --git a/api/cl_khr_external_semaphore.asciidoc b/api/cl_khr_external_semaphore.asciidoc new file mode 100644 index 00000000..211b42ff --- /dev/null +++ b/api/cl_khr_external_semaphore.asciidoc @@ -0,0 +1,289 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_semaphore.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-09-10 +*Interactions and External Dependencies*:: + * This extension requires OpenCL 1.2. + * The `<>` extension is required as it defines semaphore + objects as well as for wait and signal operations on semaphores. + * For OpenCL to be able to import external semaphores from other APIs + using this extension, the other API is required to provide below + mechanisms: + ** Ability to export semaphore handles + ** Ability to query semaphore handle in the form of one of the handle type + supported by OpenCL. + * The other APIs that want to use semaphore exported by OpenCL using this + extension are required to provide below mechanism: + ** Ability to import semaphore handles using handle types exported by + OpenCL. +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_semaphore` introduced semaphores as a new type along with a set of +APIs for create, release, retain, wait and signal operations on it. +This extension defines APIs and mechanisms to share semaphores created in an +external API by importing into and exporting from OpenCL. + +This extension defines: + + * New attributes that can be passed as part of + {cl_semaphore_properties_khr_TYPE} for specifying properties of external + semaphores to be imported or exported. + * New attributes that can be passed as part of + {cl_semaphore_info_khr_TYPE} for specifying properties of external + semaphores to be exported. + * An extension to {clCreateSemaphoreWithPropertiesKHR} to accept external + semaphore properties allowing to import or export an external semaphore + into or from OpenCL. + * Semaphore handle types required for importing and exporting semaphores. + * Modifications to Wait and Signal API behavior when dealing with external + semaphores created from different handle types. + * API query exportable semaphores handles using specified handle type. + +The layered extensions `<>`, +`<>`, +`<>`, and +`<>` define specific external semaphores +that may be imported into or exported from OpenCL. + +=== New Types + + * {cl_external_semaphore_handle_type_khr_TYPE} + +=== New Enums + + * {cl_platform_info_TYPE} + ** {CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} + ** {CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} + * {cl_device_info_TYPE} + ** {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} + ** {CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} + * {cl_semaphore_properties_khr_TYPE} and {cl_semaphore_info_khr_TYPE}: + ** {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} + ** {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR} + * {cl_semaphore_info_khr_TYPE} + ** {CL_SEMAPHORE_EXPORTABLE_KHR} + +=== Sample Code + +The following examples use the `<>` +extension to obtain an external semaphore. +Similar code can be written using the other layered extensions. + +==== Example for Importing a Semaphore Created by Another API in OpenCL in a Single-Device Context + +[source,c] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with just first device +clCreateContext(..., 1, devices, ...); + +// Obtain fd/win32 or similar handle for external semaphore to be imported +// from the other API. +int fd = getFdForExternalSemaphore(); + +// Create clSema of type cl_semaphore_khr usable on the only available device +// assuming the semaphore was imported from the same device. + +cl_semaphore_properties_khr sema_props[] = + {(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, + (cl_semaphore_properties_khr)fd, + 0}; + +int errcode_ret = 0; +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); +---- + +==== Example for Importing a Semaphore Created by Another API in OpenCL in a Multi-device Context for Single Device Usage + +[source,c] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with first two devices +clCreateContext(..., 2, devices, ...); + +// Obtain fd/win32 or similar handle for external semaphore to be imported +// from the other API. +int fd = getFdForExternalSemaphore(); + +// Create clSema of type cl_semaphore_khr usable only on device 1 +// assuming the semaphore was imported from the same device. +cl_semaphore_properties_khr sema_props[] = { + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, + (cl_semaphore_properties_khr)fd, + (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, + (cl_semaphore_properties_khr)devices[1], + CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, + 0 +}; + +int errcode_ret = 0; +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); + +---- + +==== Example for Synchronization Using a Semaphore Created by Another API and Imported in OpenCL + +[source,c] +---- +// Create clSema using one of the above examples of external semaphore creation. + +int errcode_ret = 0; +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); + +// Start the main loop + +while (true) { + // (not shown) Signal the semaphore from the other API + + // Wait for the semaphore in OpenCL + clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel + clEnqueueNDRangeKernel(command_queue, ...); + + // Signal the semaphore in OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // (not shown) Launch work in the other API that waits on 'clSema' + +} +---- + +==== Example for Synchronization Using a Semaphore Exported by OpenCL + +[source,c] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with first two devices +clCreateContext(..., 2, devices, ...); + +// Create clSema of type cl_semaphore_khr usable only on device 1 +cl_semaphore_properties_khr sema_props[] = { + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, + (cl_semaphore_properties_khr)devices[1], + CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, + 0 +}; + +int errcode_ret = 0; +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); + +// Application queries handle-type and the exportable handle associated with the semaphore. +clGetSemaphoreInfoKHR(clSema, + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, + sizeof(cl_external_semaphore_handle_type_khr), + &handle_type, + &handle_type_size); + +// The other API or process can use the exported semaphore handle +// to import +int fd = -1; +if (handle_type == CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR) { + clGetSemaphoreHandleForTypeKHR(clSema, + device, + CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, + sizeof(int), + &fd, + NULL); +} + +// Start the main rendering loop + +while (true) { + // (not shown) Signal the semaphore from the other API + + // Wait for the semaphore in OpenCL + clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel + clEnqueueNDRangeKernel(command_queue, ...); + + // Signal the semaphore in OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // (not shown) Launch work in the other API that waits on 'clSema' +} +---- + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-11-16 + ** Added {CL_SEMAPHORE_EXPORTABLE_KHR_anchor}. + * Revision 0.9.2, 2023-11-21 + ** Added re-import function call to `<>` + diff --git a/api/cl_khr_external_semaphore_dx_fence.asciidoc b/api/cl_khr_external_semaphore_dx_fence.asciidoc new file mode 100644 index 00000000..6f9c2ee7 --- /dev/null +++ b/api/cl_khr_external_semaphore_dx_fence.asciidoc @@ -0,0 +1,49 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_semaphore_dx_fence.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-09-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_semaphore_dx_fence` supports importing and exporting a +D3D12 fence as an external semaphore using the APIs introduced by +`<>`. + +=== New Enums + + * {cl_external_semaphore_handle_type_khr_TYPE} + ** {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). diff --git a/api/cl_khr_external_semaphore_opaque_fd.asciidoc b/api/cl_khr_external_semaphore_opaque_fd.asciidoc new file mode 100644 index 00000000..d1119242 --- /dev/null +++ b/api/cl_khr_external_semaphore_opaque_fd.asciidoc @@ -0,0 +1,49 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_semaphore_opaque_fd.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-09-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_semaphore_opaque_fd` supports importing and exporting a +restricted POSIX file descriptor as an external semaphore using the APIs +introduced by `<>`. + +=== New Enums + + * {cl_external_semaphore_handle_type_khr_TYPE} + ** {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). diff --git a/api/cl_khr_external_semaphore_sync_fd.asciidoc b/api/cl_khr_external_semaphore_sync_fd.asciidoc new file mode 100644 index 00000000..a8175fe2 --- /dev/null +++ b/api/cl_khr_external_semaphore_sync_fd.asciidoc @@ -0,0 +1,62 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_semaphore_sync_fd.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-09-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_semaphore_sync_fd` supports importing and exporting a POSIX +file descriptor handle to a Linux Sync File or Android Fence object as an +external semaphore using the APIs introduced by +`<>`. + +=== New Commands + + * {clGetSemaphoreHandleForTypeKHR} + +=== New Types + + * {cl_semaphore_reimport_properties_khr_TYPE} + +=== New Enums + + * {cl_external_semaphore_handle_type_khr_TYPE} + ** {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-11-16 + ** Added {CL_SEMAPHORE_EXPORTABLE_KHR_anchor}. + * Revision 0.9.2, 2023-11-21 + ** Added re-import function call to `<>` diff --git a/api/cl_khr_external_semaphore_win32.asciidoc b/api/cl_khr_external_semaphore_win32.asciidoc new file mode 100644 index 00000000..224302f2 --- /dev/null +++ b/api/cl_khr_external_semaphore_win32.asciidoc @@ -0,0 +1,50 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_external_semaphore_win32.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-09-10 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +`cl_khr_external_semaphore_win32` supports importing and exporting an NT +handle or global share handle as an external semaphore using the APIs +introduced by `<>`. + +=== New Enums + + * {cl_external_semaphore_handle_type_khr_TYPE} + ** {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR} + ** {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR} + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). diff --git a/api/cl_khr_fp16.asciidoc b/api/cl_khr_fp16.asciidoc new file mode 100644 index 00000000..7732cc29 --- /dev/null +++ b/api/cl_khr_fp16.asciidoc @@ -0,0 +1,34 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +//@ TODO There are API elements (defines) to this, as well as OpenCL C +//@ TODO Why does this even exist? All API elements appear to be in OpenCL 1.0 + +include::{generated}/meta/{refprefix}cl_khr_fp16.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_fp16` adds support to OpenCL C for half scalar and vector types as +built-in types that can be used for arithmetic operations, conversions, etc. + +See the link:{OpenCLCSpecURL}#cl_khr_fp16[Half-Precision Floating-Point] +section of the OpenCL C specification for more information. + +=== New Tokens + + * {cl_device_info_TYPE} + ** {CL_DEVICE_HALF_FP_CONFIG} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_fp64.asciidoc b/api/cl_khr_fp64.asciidoc new file mode 100644 index 00000000..ad1e8f76 --- /dev/null +++ b/api/cl_khr_fp64.asciidoc @@ -0,0 +1,35 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +//@ TODO There are API elements (defines) to this, like DBL_RADIX, as well as OpenCL C +//@ TODO Most API elements appear to be in OpenCL 1.0 / OpenCL 1.2 + +include::{generated}/meta/{refprefix}cl_khr_fp64.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_fp64` adds support to OpenCL C for double-precision scalar and +vector types as built-in types that can be used for arithmetic operations, +conversions, etc. + +See the link:{OpenCLCSpecURL}#cl_khr_fp64[Double-Precision Floating-Point] +section of the OpenCL C specification for more information. + +=== New Tokens + + * {cl_device_info_TYPE} + ** {CL_DEVICE_DOUBLE_FP_CONFIG} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_gl_depth_images.asciidoc b/api/cl_khr_gl_depth_images.asciidoc new file mode 100644 index 00000000..470af985 --- /dev/null +++ b/api/cl_khr_gl_depth_images.asciidoc @@ -0,0 +1,34 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_gl_depth_images.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_gl_depth_images` extends OpenCL / OpenGL sharing defined by the +`<>` extension to allow an OpenCL image to be created +from an OpenGL depth or depth-stencil texture. + +Depth images with an image channel order of {CL_DEPTH_STENCIL} can only be +created using the {clCreateFromGLTexture} API. + +=== New Enums + + * {cl_channel_order_TYPE} + ** {CL_DEPTH_STENCIL} + * {cl_channel_type_TYPE} + ** {CL_UNORM_INT24} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_gl_event.asciidoc b/api/cl_khr_gl_event.asciidoc new file mode 100644 index 00000000..b7d10c5c --- /dev/null +++ b/api/cl_khr_gl_event.asciidoc @@ -0,0 +1,109 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_gl_event.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_gl_event` allows creating OpenCL event objects linked to OpenGL +fence sync objects, potentially improving efficiency of sharing images and +buffers between the two APIs. +The companion `GL_ARB_cl_event` extension provides the complementary +functionality of creating an OpenGL sync object from an OpenCL event object. + +In addition, this extension modifies the behavior of +{clEnqueueAcquireGLObjects} and {clEnqueueReleaseGLObjects} to +<> with an OpenGL context bound in the same thread +as the OpenCL context. + +=== New Commands + + * {clCreateEventFromGLsyncKHR} + +=== New Tokens + + * {cl_command_type_TYPE} + ** {CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR} + +=== Issues + + . How are references between CL events and GL syncs handled? ++ +-- +*PROPOSED*: The linked CL event places a single reference on the GL sync +object. +That reference is removed when the CL event is deleted. +A more expensive alternative would be to reflect changes in the CL event +reference count through to the GL sync. +-- + + . How are linkages to synchronization primitives in other APIs handled? ++ +-- +*UNRESOLVED*. +We will at least want to have a way to link events to EGL sync objects. +There is probably no analogous DX concept. +There would be an entry point for each type of synchronization primitive to +be linked to, such as {clCreateEventFromEGLSyncKHR}. + +An alternative is a generic clCreateEventFromExternalEvent taking an +attribute list. +The attribute list would include information defining the type of the +external primitive and additional information (GL sync object handle, EGL +display and sync object handle, etc.) specific to that type. +This allows a single entry point to be reused. + +These will probably be separate extensions following the API proposed here. +-- + + . Should the {CL_EVENT_COMMAND_TYPE} correspond to the type of command + (fence) or the type of the linked sync object? ++ +-- +*PROPOSED*: To the type of the linked sync object. +-- + + . Should we support both explicit and implicit synchronization? ++ +-- +*PROPOSED*: Yes. +Implicit synchronization is suitable when GL and CL are executing in the +same application thread. +Explicit synchronization is suitable when they are executing in different +threads but the expense of glFinish is too high. +-- + + . Should this be a platform or device extension? ++ +-- +*PROPOSED*: Platform extension. +This may result in considerable under-the-hood work to implement the +sync->event semantics using only the public GL API, however, when multiple +drivers and devices with different GL support levels coexist in the same +runtime. +-- + + . Where can events generated from GL syncs be usable? ++ +-- +*PROPOSED*: Only with clEnqueueAcquireGLObjects, and attempting to use such +an event elsewhere will generate an error. +There is no apparent use case for using such events elsewhere, and possibly +some cost to supporting it, balanced by the cost of checking the source of +events in all other commands accepting them as parameters. +-- + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_gl_msaa_sharing.asciidoc b/api/cl_khr_gl_msaa_sharing.asciidoc new file mode 100644 index 00000000..eb0fed1e --- /dev/null +++ b/api/cl_khr_gl_msaa_sharing.asciidoc @@ -0,0 +1,38 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_gl_msaa_sharing.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_gl_msaa_sharing` extends the `<>` extension to +allow a shared OpenCL/OpenGL image object to be created from an OpenGL +multi-sampled ("`MSAA`") color or depth texture. + +This extension adds multi-sample support to {clCreateFromGLTexture} and +{clGetGLTextureInfo}, and allows <>. + +This extension requires `<>`. + +See the link:{OpenCLCSpecURL}#cl_khr_gl_msaa_sharing[cl_khr_gl_msaa_sharing] +section of the OpenCL C specification for more information. + +=== New Enums + + * {cl_gl_texture_info_TYPE} + ** {CL_GL_NUM_SAMPLES} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_gl_sharing.asciidoc b/api/cl_khr_gl_sharing.asciidoc new file mode 100644 index 00000000..b6c55df6 --- /dev/null +++ b/api/cl_khr_gl_sharing.asciidoc @@ -0,0 +1,242 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_gl_sharing.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_gl_sharing` extension allows use of OpenGL buffer, texture, and +renderbuffer objects as OpenCL memory objects, referred to as "`Shared +OpenCL/OpenGL Memory Objects`". + +An OpenCL context may be associated with an OpenGL context or share group +object, using additional attributes described for {clCreateContext}. + +An OpenCL image object may be created from an OpenGL texture or renderbuffer +object as described for {clCreateFromGLTexture} and +{clCreateFromGLRenderuffer}, respectively. + +An OpenCL buffer object may be created from an OpenGL buffer object using +{clCreateFromGLBuffer}. + +Any supported OpenGL object defined within the associated OpenGL context or +share group object may be shared, with the exception of the default OpenGL +objects (i.e. objects named zero), which may not be shared. + +Additional information on the use of shared OpenCL/OpenGL memory objects is +found in the <>, +<> and +<> sections. + +An OpenGL implementation supporting buffer objects and sharing of texture +and buffer object images with OpenCL is required by this extension. + +=== New Commands + + * {clGetGLContextInfoKHR} + * {clCreateFromGLBuffer} + * {clCreateFromGLTexture} + * {clCreateFromGLRenderbuffer} + * {clGetGLObjectInfo} + * {clGetGLTextureInfo} + * {clEnqueueAcquireGLObjects} + * {clEnqueueReleaseGLObjects} + +=== New Types + + * {cl_gl_context_info_TYPE} + * {cl_gl_object_type_TYPE} + * {cl_gl_texture_info_TYPE} + * {cl_gl_platform_info} + +=== New Tokens + + * New Error Codes + ** {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} + * {cl_gl_context_info_TYPE} + ** {CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR} + ** {CL_DEVICES_FOR_GL_CONTEXT_KHR} + * {cl_context_properties_TYPE} + ** {CL_GL_CONTEXT_KHR} + ** {CL_EGL_DISPLAY_KHR} + ** {CL_GLX_DISPLAY_KHR} + ** {CL_WGL_HDC_KHR} + ** {CL_CGL_SHAREGROUP_KHR} + * {cl_gl_object_type_TYPE} + ** {CL_GL_OBJECT_BUFFER} + ** {CL_GL_OBJECT_TEXTURE2D} + ** {CL_GL_OBJECT_TEXTURE3D} + ** {CL_GL_OBJECT_RENDERBUFFER} + ** {CL_GL_OBJECT_TEXTURE2D_ARRAY} + ** {CL_GL_OBJECT_TEXTURE1D} + ** {CL_GL_OBJECT_TEXTURE1D_ARRAY} + ** {CL_GL_OBJECT_TEXTURE_BUFFER} + * {cl_gl_texture_info_TYPE} + ** {CL_GL_TEXTURE_TARGET} + ** {CL_GL_MIPMAP_LEVEL} + + +=== Issues + + . How should the OpenGL context be identified when creating an associated + OpenCL context? ++ +-- +*RESOLVED*: by using a (display,context handle) attribute pair to identify +an arbitrary OpenGL or OpenGL ES context with respect to one of the +window-system binding layers EGL, GLX, or WGL, or a share group handle to +identify a CGL share group. +If a context is specified, it need not be current to the thread calling +clCreateContext*. + +A previously suggested approach would use a single boolean attribute +CL_USE_GL_CONTEXT_KHR to allow creating a context associated with the +currently bound OpenGL context. +This may still be implemented as a separate extension, and might allow more +efficient acquire/release behavior in the special case where they are being +executed in the same thread as the bound GL context used to create the CL +context. +-- + + . What should the format of an attribute list be? ++ +-- +After considerable discussion, we think we can live with a list of + pairs terminated by zero. +The list is passed as 'cl_context_properties *_properties'_, where +cl_context_properties is typedefed to be 'intptr_t' in cl.h. + +This effectively allows encoding all scalar integer, pointer, and handle +values in the host API into the argument list and is analogous to the +structure and type of EGL attribute lists. +`NULL` attribute lists are also allowed. +Again as for EGL, any attributes not explicitly passed in the list will take +on a defined default value that does something reasonable. + +Experience with EGL, GLX, and WGL has shown attribute lists to be a +sufficiently flexible and general mechanism to serve the needs of management +calls such as context creation. +It is not completely general (encoding floating-point and non-scalar +attribute values is not straightforward), and other approaches were +suggested such as opaque attribute lists with getter/setter methods, or +arrays of varadic structures. +-- + + . What's the behavior of an associated OpenGL or OpenCL context when using + resources defined by the other associated context, and that context is + destroyed? ++ +-- +*RESOLVED*: OpenCL objects place a reference on the data store underlying +the corresponding GL object when they're created. +The GL name corresponding to that data store may be deleted, but the data +store itself remains so long as any CL object has a reference to it. +However, destroying all GL contexts in the share group corresponding to a CL +context results in implementation-dependent behavior when using a +corresponding CL object, up to and including program termination. +-- + + . How about sharing with D3D? ++ +-- +Sharing between D3D and OpenCL should use the same attribute list mechanism, +though obviously with different parameters, and be exposed as a similar +parallel OpenCL extension. +There may be an interaction between that extension and this one since it's +not yet clear if it will be possible to create a CL context simultaneously +sharing GL and D3D objects. +-- + + . Under what conditions will context creation fail due to sharing? ++ +-- +*RESOLVED*: Several cross-platform failure conditions are described (GL +context or CGL share group doesn't exist, GL context doesn't support types +of GL objects, GL context implementation doesn't allow sharing), but +additional failures may result due to implementation-dependent reasons and +should be added to this extension as such failures are discovered. +Sharing between OpenCL and OpenGL requires integration at the driver +internals level. +-- + + . What command-queues can *clEnqueueAcquire/ReleaseGLObjects* be placed + on? ++ +-- +*RESOLVED*: All command-queues. +This restriction is enforced at context creation time. +If any device passed to context creation cannot support shared OpenCL/OpenGL +memory objects, context creation will fail with a {CL_INVALID_OPERATION} +error. +-- + + . How can applications determine which command-queue to place an + Acquire/Release on? ++ +-- +*RESOLVED*: The {clGetGLContextInfoKHR} returns either the CL device +currently corresponding to a specified GL context (typically the display +it's running on), or a list of all the CL devices the specified context +might run on (potentially useful in multiheaded / "`virtual screen`" +environments). +This command is not placed together with commands to create shared +OpenCL/OpenGL memory objects because it relies on the same property-list +method of specifying a GL context introduced by this extension. + +If no devices are returned, it means that the GL context exists on an older +GPU not capable of running OpenCL, but still capable of sharing objects +between GL running on that GPU and CL running elsewhere. +-- + + . What is the meaning of the {CL_DEVICES_FOR_GL_CONTEXT_KHR} query? ++ +-- +*RESOLVED*: The list of all CL devices that may ever be associated with a +specific GL context. +On platforms such as MacOS X, the "`virtual screen`" concept allows multiple +GPUs to back a single virtual display. +Similar functionality might be implemented on other windowing systems, such +as a transparent heterogenous multiheaded X server. +Therefore the exact meaning of this query is interpreted relative to the +binding layer API in use. +-- + + . What happened to the "`extension`"s `+cl_khr_gl_sharing__context+` and + `+cl_khr_gl_sharing__memobjs+` that were previously published? ++ +-- +*RESOLVED*: These were not actual extensions, but the result of splitting +the `cl_khr_gl_sharing` extension language into two separate sections for +publication. +All extension language has now been integrated into the unified +Specification and this distinction is not useful. +-- + + . Where are the `clCreateFromGLTexture2D` and `clCreateFromGLTexture3D` + fuctions described? ++ +-- +*PROPOSED*: These functions are present in cl.xml, listed as OpenCL 1.0 APIs +that were deprecated in OpenCL 1.2, but the current extension language does +not described them. +Since OpenCL 1.2 itself is so old, it is not worth the effort to look back +and determine the exact details of these APIs. +-- + + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_global_int32_base_atomics.asciidoc b/api/cl_khr_global_int32_base_atomics.asciidoc new file mode 100644 index 00000000..36b33167 --- /dev/null +++ b/api/cl_khr_global_int32_base_atomics.asciidoc @@ -0,0 +1,29 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_global_int32_base_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_global_int32_base_atomics` allows OpenCL C atomic operations to be +performed on 32-bit signed and unsigned integers in global memory. + +This extension became a core feature in OpenCL 1.1, with the built-in atomic +function names changed to use the **atomic_** prefix instead of **atom_**. + +See the link:{OpenCLCSpecURL}#cl_khr_global_int32_base_atomics[Global 32-Bit +Base Atomics] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_global_int32_extended_atomics.asciidoc b/api/cl_khr_global_int32_extended_atomics.asciidoc new file mode 100644 index 00000000..e4fd7421 --- /dev/null +++ b/api/cl_khr_global_int32_extended_atomics.asciidoc @@ -0,0 +1,31 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_global_int32_extended_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_global_int32_extended_atomics` allows OpenCL C extended atomic +operations to be performed on 32-bit signed and unsigned integers in global +memory. + +This extension became a core feature in OpenCL 1.1, with the built-in atomic +function names changed to use the **atomic_** prefix instead of **atom_**. + +See the link:{OpenCLCSpecURL}#cl_khr_global_int32_extended_atomics[Global +32-Bit Extended Atomics] section of the OpenCL C specification for more +information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/ext/cl_khr_icd.asciidoc b/api/cl_khr_icd.asciidoc similarity index 58% rename from ext/cl_khr_icd.asciidoc rename to api/cl_khr_icd.asciidoc index 2298e6cb..a28baa50 100644 --- a/ext/cl_khr_icd.asciidoc +++ b/api/cl_khr_icd.asciidoc @@ -1,43 +1,51 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 -[[cl_khr_icd-opencl]] -== Installable Client Drivers +//@ TODO This should probably be in an appendix? It is a "platform +//@ TODO extension" but so are others -[[cl_khr_icd-overview]] -=== Overview +include::{generated}/meta/{refprefix}cl_khr_icd.txt[] -This section describes a platform extension which defines a simple mechanism +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_icd` describes a platform extension which defines a simple mechanism through which the Khronos OpenCL installable client driver loader (ICD Loader) may expose multiple separate vendor installable client drivers (Vendor ICDs) for OpenCL. An application written against the ICD Loader will be able to access all -{cl_platform_id_TYPE}s exposed by all vendor implementations with the ICD Loader -acting as a demultiplexor. +{cl_platform_id_TYPE}s exposed by all vendor implementations with the ICD +Loader acting as a demultiplexor. This is a platform extension, so if this extension is supported by an -implementation, the string *cl_khr_icd* will be present in the +implementation, the string `"cl_khr_icd"` will be present in the {CL_PLATFORM_EXTENSIONS} string. -=== General Information +=== Source Code -==== Version History +The official source for the ICD Loader is available on github, at: -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== +https://github.com/KhronosGroup/OpenCL-ICD-Loader -[[cl_khr_icd-inferring-vendors-from-function-call-arguments]] -=== Inferring Vendors from Function Call Arguments +The complete `_cl_icd_dispatch` structure is defined in the header +`cl_icd.h`, which is available as a part of the OpenCL headers. + + +=== Inferring Vendors From Function Call Arguments At every OpenCL function call, the ICD Loader infers the vendor ICD function to call from the arguments to the function. An object is said to be ICD compatible if it is of the following structure: -[source,opencl] +[source,c] ---- struct _cl_ { @@ -54,13 +62,14 @@ is used to direct calls to a particular vendor implementation. All objects created from ICD compatible objects must be ICD compatible. The definition for `_cl_icd_dispatch` is provided along with the OpenCL -headers. Existing members can never be removed from that structure but new -members can be appended. +headers. +Existing members can never be removed from that structure but new members +can be appended. Functions which do not have an argument from which the vendor implementation may be inferred have been deprecated and may be ignored. -[[cl_khr_icd-icd-data]] + === ICD Data A Vendor ICD is defined by two pieces of data: @@ -73,14 +82,14 @@ A Vendor ICD is defined by two pieces of data: default suffix for extensions implemented only by that vendor. The vendor suffix string is optional. -[[cl_khr_icd-icd-loader-vendor-enumeration-on-windows]] + === ICD Loader Vendor Enumeration on Windows -To enumerate Vendor ICDs on Windows, the ICD Loader will first -scan for REG_SZ string values in the "Display Adapter" and -"Software Components" HKR registry keys. The exact registry -keys to scan should be obtained via PnP Configuration Manager -APIs, but will look like: +To enumerate Vendor ICDs on Windows, the ICD Loader will first scan for +REG_SZ string values in the "Display Adapter" and "Software Components" HKR +registry keys. +The exact registry keys to scan should be obtained via PnP Configuration +Manager APIs, but will look like: For 64-bit ICDs: @@ -120,17 +129,16 @@ Then the ICD Loader will open the Vendor ICD library: c:\vendor a\vndra_ocl.dll ---- -The ICD Loader will also scan for REG_DWORD values in the registry -key: +The ICD Loader will also scan for REG_DWORD values in the registry key: [literal] ---- HKLM\SOFTWARE\Khronos\OpenCL\Vendors ---- -For each registry value in this key which has data set to 0, the -ICD Loader will open the Vendor ICD library specified by the name -of the registry value. +For each registry value in this key which has data set to 0, the ICD Loader +will open the Vendor ICD library specified by the name of the registry +value. For example, if the registry contains the value: @@ -147,7 +155,7 @@ Then the ICD Loader will open the Vendor ICD library: c:\vendor a\vndra_ocl.dll ---- -[[cl_khr_icd-icd-loader-vendor-enumeration-on-linux]] + === ICD Loader Vendor Enumeration on Linux To enumerate vendor ICDs on Linux, the ICD Loader scans the files in the @@ -175,7 +183,7 @@ libVendorAOpenCL.so then the ICD Loader will load the library `libVendorAOpenCL.so`. -[[cl_khr_icd-icd-loader-vendor-enumeration-on-android]] + === ICD Loader Vendor Enumeration on Android To enumerate vendor ICDs on Android, the ICD Loader scans the files in the @@ -203,7 +211,7 @@ libVendorAOpenCL.so then the ICD Loader will load the library `libVendorAOpenCL.so`. -[[cl_khr_icd-adding-a-vendor-library]] + === Adding a Vendor Library Upon successfully loading a Vendor ICD's library, the ICD Loader queries the @@ -217,115 +225,36 @@ ignore the library. Next the ICD Loader queries available ICD-enabled platforms in the library using {clIcdGetPlatformIDsKHR}. For each of these platforms, the ICD Loader queries the platform's extension -string to verify that *cl_khr_icd* is supported, then queries the platform's -Vendor ICD extension suffix using {clGetPlatformInfo} with the value -{CL_PLATFORM_ICD_SUFFIX_KHR}. +string to verify that `<>` is supported, then queries the +platform's Vendor ICD extension suffix using {clGetPlatformInfo} with the +value {CL_PLATFORM_ICD_SUFFIX_KHR}. If any of these steps fail, the ICD Loader will ignore the Vendor ICD and continue on to the next. -[[cl_khr_icd-new-procedures-and-functions]] -=== New Procedures and Functions +=== New Commands -[source,opencl] ----- -cl_int clIcdGetPlatformIDsKHR(cl_uint num_entries, - cl_platform_id *platforms, - cl_uint *num_platforms); ----- + * {clIcdGetPlatformIDsKHR} -[[cl_khr_icd-new-tokens]] === New Tokens Accepted as _param_name_ to the function {clGetPlatformInfo}: ----- -CL_PLATFORM_ICD_SUFFIX_KHR ----- + * {CL_PLATFORM_ICD_SUFFIX_KHR} Returned by {clGetPlatformIDs} when no platforms are found: ----- -CL_PLATFORM_NOT_FOUND_KHR ----- - -[[cl_khr_icd-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -In _section 4.1_, replace the description of the return values of -{clGetPlatformIDs} with: + * {CL_PLATFORM_NOT_FOUND_KHR} -"{clGetPlatformIDs} returns {CL_SUCCESS} if the function is executed -successfully and there are a non zero number of platforms available. -It returns {CL_PLATFORM_NOT_FOUND_KHR} if zero platforms are available. -It returns {CL_INVALID_VALUE} if _num_entries_ is equal to zero and -_platforms_ is not `NULL` or if both _num_platforms_ and _platforms_ are -`NULL`." - -In _section 4.1_, add the following after the description of -{clGetPlatformIDs}: - -"The list of platforms accessible through the Khronos ICD Loader can be -obtained using the following function: - -include::{generated}/api/protos/clIcdGetPlatformIDsKHR.txt[] - -_num_entries_ is the number of {cl_platform_id_TYPE} entries that can be added to -_platforms_. -If _platforms_ is not `NULL`, then _num_entries_ must be greater than zero. - -_platforms_ returns a list of OpenCL platforms available for access through -the Khronos ICD Loader. -The {cl_platform_id_TYPE} values returned in _platforms_ are ICD compatible and can -be used to identify a specific OpenCL platform. -If the _platforms_ argument is `NULL`, then this argument is ignored. -The number of OpenCL platforms returned is the minimum of the value -specified by _num_entries_ or the number of OpenCL platforms available. - -_num_platforms_ returns the number of OpenCL platforms available. -If _num_platforms_ is `NULL`, then this argument is ignored. - -{clIcdGetPlatformIDsKHR} returns {CL_SUCCESS} if the function is executed -successfully and there are a non zero number of platforms available. -It returns {CL_PLATFORM_NOT_FOUND_KHR} if zero platforms are available. -It returns {CL_INVALID_VALUE} if _num_entries_ is equal to zero and -_platforms_ is not `NULL` or if both _num_platforms_ and _platforms_ are -`NULL`." - -Add the following to _table 4.1_: - -[cols="2,1,2",options="header"] -|==== -| Platform Info -| Return Type -| Description - -| {CL_PLATFORM_ICD_SUFFIX_KHR} -| {char_TYPE}[] -| The function name suffix used to identify extension functions to be - directed to this platform by the ICD Loader. - -|==== - -[[cl_khr_icd-source-code]] -=== Source Code - -The official source for the ICD Loader is available on github, at: - -https://github.com/KhronosGroup/OpenCL-ICD-Loader - -The complete `_cl_icd_dispatch` structure is defined in the header -*cl_icd.h*, which is available as a part of the OpenCL headers. - -[[cl_khr_icd-issues]] === Issues . Some OpenCL functions do not take an object argument from which their - vendor library may be identified (e.g, {clUnloadCompiler}), how will they - be handled? + vendor library may be identified (e.g, {clUnloadCompiler}), how will + they be handled? + -- -RESOLVED: Such functions will be a noop for all calls through the ICD Loader. +*RESOLVED*: Such functions will be a noop for all calls through the ICD +Loader. -- . How are OpenCL extension to be handled? @@ -336,14 +265,14 @@ RESOLVED: Such functions will be a noop for all calls through the ICD Loader. //are implemented by any vendor. //The suffix mechanism provides access for vendor extensions which are not yet //added to the ICD Loader. -RESOLVED: Extension APIs must be queried using +*RESOLVED*: Extension APIs must be queried using {clGetExtensionFunctionAddressForPlatform}. -- . How will the ICD Loader handle a `NULL` {cl_platform_id_TYPE}? + -- -RESOLVED: The ICD will by default choose the first enumerated platform as +*RESOLVED*: The ICD will by default choose the first enumerated platform as the `NULL` platform. // TODO: This seems out-of-date and incorrect. //The user can override this default by setting an environment variable @@ -355,7 +284,7 @@ the `NULL` platform. . There exists no mechanism to unload the ICD Loader, should there be one? + -- -RESOLVED: As there is no standard mechanism for unloading a vendor +*RESOLVED*: As there is no standard mechanism for unloading a vendor implementation, do not add one for the ICD Loader. -- @@ -363,9 +292,15 @@ implementation, do not add one for the ICD Loader. functions? + -- -RESOLVED: The ICD Loader will check for `NULL` objects passed to the OpenCL -functions without trying to dereference the `NULL` objects for obtaining the -ICD dispatch table. -On detecting a `NULL` object it will return one of the an invalid object error -values (e.g. {CL_INVALID_DEVICE} corresponding to the object in question. +*RESOLVED*: The ICD Loader will check for `NULL` objects passed to the +OpenCL functions without trying to dereference the `NULL` objects for +obtaining the ICD dispatch table. +On detecting a `NULL` object it will return one of the an invalid object +error values (e.g. {CL_INVALID_DEVICE} corresponding to the object in +question. -- + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_il_program.asciidoc b/api/cl_khr_il_program.asciidoc new file mode 100644 index 00000000..fc4a3d7e --- /dev/null +++ b/api/cl_khr_il_program.asciidoc @@ -0,0 +1,41 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_il_program.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_il_program` adds the ability to create programs with intermediate +language (IL), usually SPIR-V. +Further information about the format and contents of SPIR-V may be found in +the SPIR-V Specification. +Information about how SPIR-V modules behave in the OpenCL environment may be +found in the OpenCL SPIR-V Environment Specification. + +This functionality described by this extension is a core feature in OpenCL +2.1. + +=== New Commands + + * {clCreateProgramWithILKHR} + +=== New Tokens + + * {cl_device_info_TYPE} + ** {CL_DEVICE_IL_VERSION_KHR} + * {cl_platform_info_TYPE} + ** {CL_PROGRAM_IL_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_image2d_from_buffer.asciidoc b/api/cl_khr_image2d_from_buffer.asciidoc new file mode 100644 index 00000000..17432c1e --- /dev/null +++ b/api/cl_khr_image2d_from_buffer.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_image2d_from_buffer.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_image2d_from_buffer` allows a 2D image to be created from an +existing OpenCL buffer memory object. + +This extension became a core feature in OpenCL 2.0. + +Refer to the discussion of 2D images created from buffers in the +<> section for additional details. + +=== New Tokens + + * {CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR} + * {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_initialize_memory.asciidoc b/api/cl_khr_initialize_memory.asciidoc new file mode 100644 index 00000000..8eddeedf --- /dev/null +++ b/api/cl_khr_initialize_memory.asciidoc @@ -0,0 +1,46 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_initialize_memory.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_initialize_memory` adds OpenCL C support for initializing local and +private memory before a kernel begins execution. +This is accomplished by specifying a flag at context creation time affecting +all such memory. + +Memory is allocated in various forms in OpenCL both explicitly (global +memory) or implicitly (local, private memory). +This allocation so far does not provide a straightforward mechanism to +initialize the memory on allocation. +In other words what is lacking is the equivalent of `calloc` for the +currently supported `malloc` like capability. +This functionality is useful for a variety of reasons including ease of +debugging, application controlled limiting of visibility to previous +contents of memory and in some cases, optimization. + +See the link:{OpenCLCSpecURL}#cl_khr_initialize_memory[Initializing Memory] +section of the OpenCL C specification for more information. + +=== New Enums + + * {cl_context_properties_TYPE} + ** {CL_CONTEXT_MEMORY_INITIALIZE_KHR} + * {cl_context_memory_initialize_khr_TYPE} + ** {CL_CONTEXT_MEMORY_INITIALIZE_LOCAL_KHR} + ** {CL_CONTEXT_MEMORY_INITIALIZE_PRIVATE_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_int64_base_atomics.asciidoc b/api/cl_khr_int64_base_atomics.asciidoc new file mode 100644 index 00000000..8723cab2 --- /dev/null +++ b/api/cl_khr_int64_base_atomics.asciidoc @@ -0,0 +1,27 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_int64_base_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_int64_base_atomics` adds built-in OpenCL functions supporting atomic +operations to be performed on 64-bit signed and unsigned integers in global +and local memory. + +See the link:{OpenCLCSpecURL}#cl_khr_int64_base_atomics[64-Bit Base Atomics] +section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_int64_extended_atomics.asciidoc b/api/cl_khr_int64_extended_atomics.asciidoc new file mode 100644 index 00000000..f4fab107 --- /dev/null +++ b/api/cl_khr_int64_extended_atomics.asciidoc @@ -0,0 +1,27 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_int64_extended_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_int64_extended_atomics` adds built-in OpenCL functions supporting +extended atomic operations to be performed on 64-bit signed and unsigned +integers in global and local memory. + +See the link:{OpenCLCSpecURL}#cl_khr_int64_extended_atomics[64-Bit Extended +Atomics] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_integer_dot_product.asciidoc b/api/cl_khr_integer_dot_product.asciidoc new file mode 100644 index 00000000..ef47c2a6 --- /dev/null +++ b/api/cl_khr_integer_dot_product.asciidoc @@ -0,0 +1,64 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_integer_dot_product.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-06-23 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kévin Petit, Arm Ltd. + - Jeremy Kemp, Imagination Technologies + - Ben Ashbaugh, Intel + - Ruihao Zhang, Qualcomm + - Stuart Brady, Arm Ltd + - Balaji Calidas, Qualcomm + - Ayal Zaks, Intel + +=== Description + +`cl_khr_integer_dot_product` adds support for SPIR-V instructions and OpenCL +C built-in functions to compute the dot product of vectors of integers. + +OpenCL C compilers supporting this extension will define the extension macro +`cl_khr_integer_dot_product`, and may define corresponding feature macros +{opencl_c_integer_dot_product_input_4x8bit} and +{opencl_c_integer_dot_product_input_4x8bit_packed} depending on the reported +capabilities. + +See the link:{OpenCLCSpecURL}#cl_khr_integer_dot_product[Integer Dot +Product] section of the OpenCL C specification for more information. + +// The 'New ...' section can be auto-generated + +=== New Structures + + * {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} + +=== New Types + + * {cl_device_integer_dot_product_capabilities_khr_TYPE} + +=== New Enums + + * {cl_device_integer_dot_product_capabilities_khr_TYPE} + ** {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} + ** {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} + * {cl_device_info_TYPE} + ** {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR} + ** {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR} + ** {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR} + +=== New SPIR-V Capabilities + + * TBD + +=== Version History + + * Revision 1.0.0, 2021-06-17 + ** Initial version + * Revision 2.0.0, 2021-06-23 + ** 8-bit support is mandatory, added 8-bit acceleration properties. diff --git a/api/cl_khr_local_int32_base_atomics.asciidoc b/api/cl_khr_local_int32_base_atomics.asciidoc new file mode 100644 index 00000000..4fba21aa --- /dev/null +++ b/api/cl_khr_local_int32_base_atomics.asciidoc @@ -0,0 +1,29 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_local_int32_base_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_local_int32_base_atomics` allows OpenCL C atomic operations to be +performed on 32-bit signed and unsigned integers in local memory. + +This extension became a core feature in OpenCL 1.1, with the built-in atomic +function names changed to use the **atomic_** prefix instead of **atom_**. + +See the link:{OpenCLCSpecURL}#cl_khr_local_int32_base_atomics[Local 32-Bit +Base Atomics] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_local_int32_extended_atomics.asciidoc b/api/cl_khr_local_int32_extended_atomics.asciidoc new file mode 100644 index 00000000..05b5d0ca --- /dev/null +++ b/api/cl_khr_local_int32_extended_atomics.asciidoc @@ -0,0 +1,31 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_local_int32_extended_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_local_int32_extended_atomics` allows OpenCL C extended atomic +operations to be performed on 32-bit signed and unsigned integers in local +memory. + +This extension became a core feature in OpenCL 1.1, with the built-in atomic +function names changed to use the **atomic_** prefix instead of **atom_**. + +See the link:{OpenCLCSpecURL}#cl_khr_local_int32_extended_atomics[Local +32-Bit Extended Atomics] section of the OpenCL C specification for more +information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_mipmap_image.asciidoc b/api/cl_khr_mipmap_image.asciidoc new file mode 100644 index 00000000..ab2a7fe0 --- /dev/null +++ b/api/cl_khr_mipmap_image.asciidoc @@ -0,0 +1,39 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_mipmap_image.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_mipmap_image` extension adds the ability to create and access +mipmapped images: + + * {clCreateImage} is extended to create mipmapped images. + * {clCreateFromGLTexture} is extended to create a mipmapped image from a + mipmapped GL texture. + * {clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueCopyImage}, + {clEnqueueFillImage}, {clEnqueueCopyImageToBuffer}, + {clEnqueueCopyBufferToImage}, and {clEnqueueMapImage} are + extended to operate on regions of mipmapped images. + ** The <> section describes how mipmap levels are encoded in + existing parameters to these commands. + * OpenCL C built-in functions are added to read from and query a mipmapped + image. + +See the link:{OpenCLCSpecURL}#cl_khr_mipmap_image[Mipmapped Image Reads and +Queries] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_mipmap_image_writes.asciidoc b/api/cl_khr_mipmap_image_writes.asciidoc new file mode 100644 index 00000000..84278ce6 --- /dev/null +++ b/api/cl_khr_mipmap_image_writes.asciidoc @@ -0,0 +1,29 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_mipmap_image_writes.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_mipmap_image_writes` extension adds OpenCL C built-in functions +to write to a mipmapped image. + +If `cl_khr_mipmap_image_writes` is supported by the OpenCL device, the +`<>` extension must also be supported. + +See the link:{OpenCLCSpecURL}#cl_khr_mipmap_image_writes[Mipmapped Image +Writes] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_pci_bus_info.asciidoc b/api/cl_khr_pci_bus_info.asciidoc new file mode 100644 index 00000000..0f5a87ed --- /dev/null +++ b/api/cl_khr_pci_bus_info.asciidoc @@ -0,0 +1,43 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_pci_bus_info.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-04-19 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_pci_bus_info` extension adds a new query to obtain PCI bus +information about an OpenCL device. + +Not all OpenCL devices have PCI bus information, either due to the device +not being connected to the system through a PCI interface or due to platform +specific restrictions and policies. +Thus this extension is only expected to be supported by OpenCL devices which +can provide the information. + +As a consequence, applications should always check for the presence of the +extension string for each individual OpenCL device for which they intend to +issue the new query for and should not have any assumptions about the +availability of the extension on any given platform. + +=== New Types + + * {cl_device_pci_bus_info_khr_TYPE} + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_PCI_BUS_INFO_KHR} + +=== Version History + + * Revision 1.0.0, 2021-04-19 + ** Initial version. diff --git a/api/cl_khr_priority_hints.asciidoc b/api/cl_khr_priority_hints.asciidoc new file mode 100644 index 00000000..d36c2083 --- /dev/null +++ b/api/cl_khr_priority_hints.asciidoc @@ -0,0 +1,46 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_priority_hints.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_priority_hints` extension adds priority hints for OpenCL, but +does not specify the scheduling behavior or minimum guarantees. +It is expected that the the user guides associated with each implementation +which supports this extension will describe the scheduling behavior +guarantees. + +Note that the priority hint is orthogonal to functionality defined in the +`<>` extension. +For example, a task may have high priority ({CL_QUEUE_PRIORITY_HIGH_KHR}) +but should at the same time be executed at an optimized throttle setting +({CL_QUEUE_THROTTLE_LOW_KHR}). + +=== New Types + + * {cl_queue_priority_khr_TYPE} + +=== New Enums + + * {cl_queue_properties_TYPE} + ** {CL_QUEUE_PRIORITY_KHR} + * {cl_queue_priority_khr_TYPE} + ** {CL_QUEUE_PRIORITY_HIGH_KHR} + ** {CL_QUEUE_PRIORITY_MED_KHR} + ** {CL_QUEUE_PRIORITY_LOW_KHR} + + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_select_fprounding_mode.asciidoc b/api/cl_khr_select_fprounding_mode.asciidoc new file mode 100644 index 00000000..c9781415 --- /dev/null +++ b/api/cl_khr_select_fprounding_mode.asciidoc @@ -0,0 +1,31 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_select_fprounding_mode.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_select_fprounding_mode` allows an application to specify the +rounding mode for an instruction or group of instructions in the OpenCL C +program source. + +NOTE: This extension was deprecated in OpenCL 1.1, and its use is not +recommended. + +See the link:{OpenCLCSpecURL}#cl_khr_select_fprounding_mode[Select +Floating-Point Rounding Mode] section of the OpenCL C specification for more +information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_semaphore.asciidoc b/api/cl_khr_semaphore.asciidoc new file mode 100644 index 00000000..43c2b1ae --- /dev/null +++ b/api/cl_khr_semaphore.asciidoc @@ -0,0 +1,262 @@ +// Copyright 2021-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_semaphore.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-08-01 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Ajit Hakke-Patil, NVIDIA + - Amit Rao, NVIDIA + - Balaji Calidas, QUALCOMM + - Ben Ashbaugh, INTEL + - Carsten Rohde, NVIDIA + - Christoph Kubisch, NVIDIA + - Debalina Bhattacharjee, NVIDIA + - Faith Ekstrand, INTEL + - Gorazd Sumkovski, ARM + - James Jones, NVIDIA + - Jeremy Kemp, IMAGINATION + - Joshua Kelly, QUALCOMM + - Karthik Raghavan Ravi, NVIDIA + - Kedar Patil, NVIDIA + - Kevin Petit, ARM + - Nikhil Joshi, NVIDIA + - Sharan Ashwathnarayan, NVIDIA + - Vivek Kini, NVIDIA + +=== Description + +OpenCL provides {cl_event_TYPE} as a primary mechanism of synchronization +between host and device as well as across devices. +While events can be waited on or can be passed as dependencies across +work-submissions, they suffer from following limitations: + + * They are immutable. + * They are not reusable. + +`cl_khr_semaphore` introduces a new type of synchronization object to +represent _semaphores_ that can be reused, waited on, and signaled multiple +times by OpenCL work-submissions. + +In particular, this extension defines: + + * a new type called {cl_semaphore_khr_TYPE} to represent the semaphore + objects. + * A new type called {cl_semaphore_properties_khr_TYPE} to specify metadata + associated with semaphores. + * Functions to create, retain, and release semaphores. + * Functions to wait on and signal semaphore objects. + * Functions to query the properties of semaphore objects. + +// The 'New ...' section can be auto-generated + +=== New Commands + + * {clCreateSemaphoreWithPropertiesKHR} + * {clEnqueueWaitSemaphoresKHR} + * {clEnqueueSignalSemaphoresKHR} + * {clGetSemaphoreInfoKHR} + * {clReleaseSemaphoreKHR} + * {clRetainSemaphoreKHR} + +=== New Types + + * {cl_semaphore_khr_TYPE} + * {cl_semaphore_properties_khr_TYPE} + * {cl_semaphore_info_khr_TYPE} + * {cl_semaphore_type_khr_TYPE} + * {cl_semaphore_payload_khr_TYPE} + +=== New Enums + + * {cl_platform_info_TYPE} + ** {CL_PLATFORM_SEMAPHORE_TYPES_KHR} + * {cl_device_info_TYPE} + ** {CL_DEVICE_SEMAPHORE_TYPES_KHR} + * {cl_semaphore_type_khr_TYPE} + ** {CL_SEMAPHORE_TYPE_BINARY_KHR} 1 + * {cl_semaphore_info_khr_TYPE} + ** {CL_SEMAPHORE_CONTEXT_KHR} + ** {CL_SEMAPHORE_REFERENCE_COUNT_KHR} + ** {CL_SEMAPHORE_PROPERTIES_KHR} + ** {CL_SEMAPHORE_PAYLOAD_KHR} + * {cl_semaphore_info_khr_TYPE} or {cl_semaphore_properties_khr_TYPE} + ** {CL_SEMAPHORE_TYPE_KHR} + ** {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} + ** {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR} +// TODO these are not described anywhere in the extension spec document + * New return values from {clGetEventInfo} + ** {CL_COMMAND_SEMAPHORE_WAIT_KHR} + ** {CL_COMMAND_SEMAPHORE_SIGNAL_KHR} + * New error codes + ** {CL_INVALID_SEMAPHORE_KHR} + + +[[cl_khr_semaphore-Sample-Code]] +=== Sample Code + +==== Example for Semaphore Creation in a Single Device Context + +[source] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with just first device +context = clCreateContext(..., 1, devices, ...); + +// Create clSema of type cl_semaphore_khr usable on single device in the context + +cl_semaphore_properties_khr sema_props[] = + {(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + 0}; + +int errcode_ret = 0; + +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); +---- + +==== Example for Semaphore Creation for a Single Device in a Multi-Device Context + +[source] +---- +// Get cl_devices of the platform. +clGetDeviceIDs(..., &devices, &deviceCount); + +// Create cl_context with first two devices +clCreateContext(..., 2, devices, ...); + +// Create clSema of type cl_semaphore_khr usable only on device 0 +cl_semaphore_properties_khr sema_props[] = { + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, + (cl_semaphore_properties_khr)devices[0], + CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, + 0 +}; + +int errcode_ret = 0; + +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); +---- + +==== Example for Synchronization Using Wait and Signal + +[source] +---- +// clSema is created using clCreateSemaphoreWithPropertiesKHR +// using one of the examples for semaphore creation. + +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); + +// Start the main loop + +while (true) { + // (not shown) Signal the semaphore from other work + + // Wait for the semaphore in OpenCL + // by calling clEnqueueWaitSemaphoresKHR on 'clSema' + clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel that accesses extMem + clEnqueueNDRangeKernel(command_queue, ...); + + // Signal the semaphore in OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // (not shown) Launch other work that waits on 'clSema' +} +---- + +==== Example for {clGetSemaphoreInfoKHR} + +[source] +---- +// clSema is created using clCreateSemaphoreWithPropertiesKHR +// using one of the examples for semaphore creation. + +cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, + sema_props, + &errcode_ret); + +// Start the main rendering loop + +while (true) { + // (not shown) Signal the semaphore from other work + + // Wait for the semaphore in OpenCL, by calling clEnqueueWaitSemaphoresKHR on 'clSema' + clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Launch kernel in OpenCL + clEnqueueNDRangeKernel(command_queue, ...); + + // Signal the semaphore in OpenCL + clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, + /*num_sema_objects*/ 1, + /*sema_objects*/ &clSema, + /*sema_payload_list*/ NULL, + /*num_events_in_wait_list*/ 0, + /*event_wait_list*/ NULL, + /*event*/ NULL); + + // Query type of clSema + clGetSemaphoreInfoKHR(/*sema_object*/ clSema, + /*param_name*/ CL_SEMAPHORE_TYPE_KHR, + /*param_value_size*/ sizeof(cl_semaphore_type_khr), + /*param_value*/ &clSemaType, + /*param_value_ret_size*/ &clSemaTypeSize); + + if (clSemaType == CL_SEMAPHORE_TYPE_BINARY_KHR) { + // Do something + } + else { + // Do something else + } + // (not shown) Launch other work that waits on 'clSema' +} +---- + +=== Issues + +include::provisional_notice.asciidoc[] + +=== Version History + + * Revision 0.9.0, 2021-09-10 + ** Initial version (provisional). + * Revision 0.9.1, 2023-08-01 + ** Changed device handle list enum to the semaphore-specific + {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} (provisional). + + diff --git a/api/cl_khr_spir.asciidoc b/api/cl_khr_spir.asciidoc new file mode 100644 index 00000000..33ec23e4 --- /dev/null +++ b/api/cl_khr_spir.asciidoc @@ -0,0 +1,39 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_spir.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_spir` adds the ability to create an OpenCL program object from a +Standard Portable Intermediate Representation (SPIR) instance. +A SPIR instance is a vendor-neutral non-source representation for OpenCL C +programs. + +See the <> for +information on compiling SPIR binaries. + +`cl_khr_spir` has been superseded by the SPIR-V intermediate representation, +which is supported by the `<>` extension, and is a core +feature in OpenCL 2.1. + +=== New Tokens + + * {cl_device_info_TYPE} + ** {CL_DEVICE_SPIR_VERSIONS} + * {cl_program_binary_type_TYPE} + ** CL_PROGRAM_BINARY_TYPE_INTERMEDIATE} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_srgb_image_writes.asciidoc b/api/cl_khr_srgb_image_writes.asciidoc new file mode 100644 index 00000000..e12ccb36 --- /dev/null +++ b/api/cl_khr_srgb_image_writes.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_srgb_image_writes.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_srgb_image_writes` enables OpenCL C kernels to write to sRGB images +using the *write_imagef* built-in function. +The sRGB image formats that may be written to will be returned by +{clGetSupportedImageFormats}. + +When the image is an sRGB image, the *write_imagef* built-in function will +perform the linear to sRGB conversion. +Only the R, G, and B components are converted from linear to sRGB; the A +component is written as-is. + +See the link:{OpenCLCSpecURL}#cl_khr_srgb_image_writes[sRGB Image Write +Functions] section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_subgroup_ballot.asciidoc b/api/cl_khr_subgroup_ballot.asciidoc new file mode 100644 index 00000000..0978f2d6 --- /dev/null +++ b/api/cl_khr_subgroup_ballot.asciidoc @@ -0,0 +1,52 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_ballot.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_ballot` adds built-in OpenCL C functions with the ability +to collect and operate on ballots from work items in a sub-group. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_ballot[Sub-Group Ballots] +section of the OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_ballot: + +gentype sub_group_non_uniform_broadcast( gentype value, uint index ) +gentype sub_group_broadcast_first( gentype value ) + +uint4 sub_group_ballot( int predicate ) +int sub_group_inverse_ballot( uint4 value ) +int sub_group_ballot_bit_extract( uint4 value, uint index ) +uint sub_group_ballot_bit_count( uint4 value ) +uint sub_group_ballot_inclusive_scan( uint4 value ) +uint sub_group_ballot_exclusive_scan( uint4 value ) +uint sub_group_ballot_find_lsb( uint4 value ) +uint sub_group_ballot_find_msb( uint4 value ) + +uint4 get_sub_group_eq_mask() +uint4 get_sub_group_ge_mask() +uint4 get_sub_group_gt_mask() +uint4 get_sub_group_le_mask() +uint4 get_sub_group_lt_mask() +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_clustered_reduce.asciidoc b/api/cl_khr_subgroup_clustered_reduce.asciidoc new file mode 100644 index 00000000..a2d60ca8 --- /dev/null +++ b/api/cl_khr_subgroup_clustered_reduce.asciidoc @@ -0,0 +1,46 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_clustered_reduce.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_clustered_reduce` adds built-in OpenCL functions for +clustered reductions that operate on a subset of work items in the +sub-group. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_clustered_reduce[Clustered +Reductions] section of the OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_clustered_reduce: + +gentype sub_group_clustered_reduce_add( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_mul( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_min( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_max( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_and( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_or( gentype value, uint clustersize ) +gentype sub_group_clustered_reduce_xor( gentype value, uint clustersize ) +int sub_group_clustered_reduce_logical_and( int predicate, uint clustersize ) +int sub_group_clustered_reduce_logical_or( int predicate, uint clustersize ) +int sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_extended_types.asciidoc b/api/cl_khr_subgroup_extended_types.asciidoc new file mode 100644 index 00000000..b5cfc793 --- /dev/null +++ b/api/cl_khr_subgroup_extended_types.asciidoc @@ -0,0 +1,50 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_extended_types.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_extended_types` adds additional supported OpenCL C data +types to the existing sub-group broadcast, scan, and reduction functions. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_extended_types[Sub-Group +Extended Types] section of the OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_extended_types: + +// Note: Existing functions supporting additional data types. + +gentype sub_group_broadcast( gentype value, uint index ) + +gentype sub_group_reduce_add( gentype value ) +gentype sub_group_reduce_min( gentype value ) +gentype sub_group_reduce_max( gentype value ) + +gentype sub_group_scan_inclusive_add( gentype value ) +gentype sub_group_scan_inclusive_min( gentype value ) +gentype sub_group_scan_inclusive_max( gentype value ) + +gentype sub_group_scan_exclusive_add( gentype value ) +gentype sub_group_scan_exclusive_min( gentype value ) +gentype sub_group_scan_exclusive_max( gentype value ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_named_barrier.asciidoc b/api/cl_khr_subgroup_named_barrier.asciidoc new file mode 100644 index 00000000..07df0c86 --- /dev/null +++ b/api/cl_khr_subgroup_named_barrier.asciidoc @@ -0,0 +1,33 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_named_barrier.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_named_barrier` adds barrier operations that cover subsets +of an OpenCL work-group. +Only the OpenCL API changes are described in this section. +Please refer to the SPIR-V specification for information about using +sub-group named barriers in the SPIR-V intermediate representation, and to +the OpenCL {cpp} specification for descriptions of the sub-group named +barrier built-in functions in the OpenCL {cpp} kernel language. + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc b/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc new file mode 100644 index 00000000..3389abe0 --- /dev/null +++ b/api/cl_khr_subgroup_non_uniform_arithmetic.asciidoc @@ -0,0 +1,71 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_non_uniform_arithmetic.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_non_uniform_arithmetic` adds built-in OpenCL C functions +providing the ability to use some sub-group functions within non-uniform +flow control, including additional scan and reduction operators. + +See the +link:{OpenCLCSpecURL}#cl_khr_subgroup_non_uniform_arithmetic[Built-in +Non-Uniform Arithmetic Functions for Sub-Groups] section of the OpenCL C +specification for more information. + +[[extended-sub-groups-summariy]] +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_non_uniform_arithmetic: + +gentype sub_group_non_uniform_reduce_add( gentype value ) +gentype sub_group_non_uniform_reduce_mul( gentype value ) +gentype sub_group_non_uniform_reduce_min( gentype value ) +gentype sub_group_non_uniform_reduce_max( gentype value ) +gentype sub_group_non_uniform_reduce_and( gentype value ) +gentype sub_group_non_uniform_reduce_or( gentype value ) +gentype sub_group_non_uniform_reduce_xor( gentype value ) +int sub_group_non_uniform_reduce_logical_and( int predicate ) +int sub_group_non_uniform_reduce_logical_or( int predicate ) +int sub_group_non_uniform_reduce_logical_xor( int predicate ) + +gentype sub_group_non_uniform_scan_inclusive_add( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_mul( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_min( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_max( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_and( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_or( gentype value ) +gentype sub_group_non_uniform_scan_inclusive_xor( gentype value ) +int sub_group_non_uniform_scan_inclusive_logical_and( int predicate ) +int sub_group_non_uniform_scan_inclusive_logical_or( int predicate ) +int sub_group_non_uniform_scan_inclusive_logical_xor( int predicate ) + +gentype sub_group_non_uniform_scan_exclusive_add( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_mul( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_min( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_max( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_and( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_or( gentype value ) +gentype sub_group_non_uniform_scan_exclusive_xor( gentype value ) +int sub_group_non_uniform_scan_exclusive_logical_and( int predicate ) +int sub_group_non_uniform_scan_exclusive_logical_or( int predicate ) +int sub_group_non_uniform_scan_exclusive_logical_xor( int predicate ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_non_uniform_vote.asciidoc b/api/cl_khr_subgroup_non_uniform_vote.asciidoc new file mode 100644 index 00000000..f884c58b --- /dev/null +++ b/api/cl_khr_subgroup_non_uniform_vote.asciidoc @@ -0,0 +1,45 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_non_uniform_vote.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +[[extended-sub-groups]] +=== Description + +`cl_khr_subgroup_non_uniform_vote` adds built-in OpenCL C functions with the +ability to elect a single work item from a sub-group to perform a task and +to hold votes among work items in a sub-group. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_non_uniform_vote[Built-in +Non-Uniform Vote and Election Functions for Sub-Groups] section of the +OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_non_uniform_vote: + +int sub_group_elect() + +int sub_group_non_uniform_all( int predicate ) +int sub_group_non_uniform_any( int predicate ) +int sub_group_non_uniform_all_equal( gentype value ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_rotate.asciidoc b/api/cl_khr_subgroup_rotate.asciidoc new file mode 100644 index 00000000..3f9d04df --- /dev/null +++ b/api/cl_khr_subgroup_rotate.asciidoc @@ -0,0 +1,32 @@ +// Copyright 2022-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_rotate.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2022-04-22 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kévin Petit, Arm Ltd. + - Ben Ashbaugh, Intel + - Ruihao Zhang, Qualcomm + - Sven van Haastregt, Arm Ltd. + - Anastasia Stulova, Arm Ltd. + - Stuart Brady, Arm Ltd. + +=== Description + +`cl_khr_subgroup_rotate` adds built-in OpenCL C functions with support for a +new sub-group data exchange operation that makes it possible to rotate +values through the work items in a sub-group. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_rotate[Sub-Group Rotation] +section of the OpenCL C specification for more information. + +=== Version History + + * Revision 1.0.0, 2022-04-22 + ** Initial version. diff --git a/api/cl_khr_subgroup_shuffle.asciidoc b/api/cl_khr_subgroup_shuffle.asciidoc new file mode 100644 index 00000000..f90bd70b --- /dev/null +++ b/api/cl_khr_subgroup_shuffle.asciidoc @@ -0,0 +1,37 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_shuffle.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_shuffle` adds built-in OpenCL C functions providing +additional ways to exchange data among work items in a sub-group. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_shuffle[General Purpose +Shuffles] section of the OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_shuffle: + +gentype sub_group_shuffle( gentype value, uint index ) +gentype sub_group_shuffle_xor( gentype value, uint mask ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroup_shuffle_relative.asciidoc b/api/cl_khr_subgroup_shuffle_relative.asciidoc new file mode 100644 index 00000000..1419828e --- /dev/null +++ b/api/cl_khr_subgroup_shuffle_relative.asciidoc @@ -0,0 +1,38 @@ +// Copyright 2020-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroup_shuffle_relative.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-12-15 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroup_shuffle_relative` adds built-in OpenCL C functions +providing specialized ways to exchange data among work items in a sub-group +that may perform better on some implementations. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroup_shuffle_relative[Relative +Shuffles] section of the OpenCL C specification for more information. + +=== Summary of New OpenCL C Functions + +[source,opencl_c] +---- +// These functions are available to devices supporting +// cl_khr_subgroup_shuffle_relative: + +gentype sub_group_shuffle_up( gentype value, uint delta ) +gentype sub_group_shuffle_down( gentype value, uint delta ) +---- + +=== Version History + + * Revision 1.0.0, 2020-12-15 + ** First assigned version. diff --git a/api/cl_khr_subgroups.asciidoc b/api/cl_khr_subgroups.asciidoc new file mode 100644 index 00000000..4acff29a --- /dev/null +++ b/api/cl_khr_subgroups.asciidoc @@ -0,0 +1,53 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_subgroups.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_subgroups` adds support for implementation-controlled groups of work +items, known as sub-groups. +Sub-groups behave similarly to work-groups and have their own sets of +built-ins and synchronization primitives. +Sub-groups within a work-group are independent, may make forward progress +with respect to each other, and may map to optimized hardware structures +where that makes sense. + +Sub-groups were promoted to a core feature in OpenCL 2.1. +However, note that: + + * The sub-group OpenCL C built-in functions described by this extension + must still be accessed as an OpenCL C extension in OpenCL 2.1. + * Sub-group independent forward progress is an optional device property in + OpenCL 2.1, see {CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS}. + +See the link:{OpenCLCSpecURL}#cl_khr_subgroups[Sub-Groups] section of the +OpenCL C specification for more information. + +=== New Types + + * {cl_kernel_sub_group_info} + +=== New Commands + + * {clGetKernelSubGroupInfoKHR} + +=== New Enums + + * {cl_kernel_sub_group_info_TYPE} + ** {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR} + ** {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_suggested_local_work_size.asciidoc b/api/cl_khr_suggested_local_work_size.asciidoc new file mode 100644 index 00000000..55897962 --- /dev/null +++ b/api/cl_khr_suggested_local_work_size.asciidoc @@ -0,0 +1,37 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_suggested_local_work_size.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2021-04-22 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +`cl_khr_suggested_local_work_size` adds the ability to query a suggested +local work-group size for a kernel running on a device for a specified +global work size and global work offset. +The suggested local work-group size will match the work-group size that +would be chosen if the kernel were enqueued with the specified global work +size and global work offset and a `NULL` local work size. + +By using the suggested local work-group size query an application has +greater insight into the local work-group size chosen by the OpenCL +implementation, and the OpenCL implementation need not re-compute the local +work-group size if the same kernel is enqueued multiple times with the same +parameters. + +=== New Commands + + * {clGetKernelSuggestedLocalWorkSizeKHR} + +=== Version History + + * Revision 1.0.0, 2021-04-22 + ** Initial version. diff --git a/api/cl_khr_terminate_context.asciidoc b/api/cl_khr_terminate_context.asciidoc new file mode 100644 index 00000000..fac86afd --- /dev/null +++ b/api/cl_khr_terminate_context.asciidoc @@ -0,0 +1,60 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_terminate_context.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_terminate_context` extension provides a new query to check +whether a device can terminate an OpenCL context, and adds an API to +terminate a context. + +Today, OpenCL provides an API to release a context. +This operation is done only after all queues, memory object, programs and +kernels are released, which in turn might wait for all ongoing operations to +complete. +However, there are cases in which a fast release is required, or release +operation cannot be done, as commands are stuck in mid execution. +An example of the first case can be program termination due to exception, or +quick shutdown due to low power. +Examples of the second case are when a kernel is running too long, or gets +stuck, or it may result from user action which makes the results of the +computation unnecessary. + +In many cases, the driver or the device is capable of speeding up the +closure of ongoing operations when the results are no longer required in a +much more expedient manner than waiting for all previously enqueued +operations to finish. + +=== New Types + + * {cl_device_terminate_capability_khr_TYPE} + +=== New Commands + + * {clTerminateContextKHR} + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_TERMINATE_CAPABILITY_KHR} + * {cl_context_properties_TYPE} + ** {CL_CONTEXT_TERMINATE_KHR} + * {cl_device_terminate_capability_khr_TYPE} + ** {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR} + * New Error codes + ** {CL_CONTEXT_TERMINATED_KHR} + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_throttle_hints.asciidoc b/api/cl_khr_throttle_hints.asciidoc new file mode 100644 index 00000000..af11a150 --- /dev/null +++ b/api/cl_khr_throttle_hints.asciidoc @@ -0,0 +1,45 @@ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_throttle_hints.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-04-21 +*IP Status*:: + No known IP claims. +*Contributors*:: + TBD + +=== Description + +The `cl_khr_throttle_hints` extension adds throttle hints for OpenCL, but +does not specify the throttling behavior or minimum guarantees. +It is expected that the user guide associated with each implementation which +supports this extension will describe the throttling behavior guarantees. + +Note that the throttle hint is orthogonal to functionality defined in +`<>` extension. +For example, a task may have high priority ({CL_QUEUE_PRIORITY_HIGH_KHR}) +but should at the same time be executed at an optimized throttle setting +({CL_QUEUE_THROTTLE_LOW_KHR}). + +=== New Types + + * {cl_queue_throttle_khr_TYPE} + +=== New Enums + + * {cl_queue_properties_TYPE} + ** {CL_QUEUE_THROTTLE_KHR} + * {cl_queue_throttle_khr_TYPE} + ** {CL_QUEUE_THROTTLE_HIGH_KHR} + ** {CL_QUEUE_THROTTLE_MED_KHR} + ** {CL_QUEUE_THROTTLE_LOW_KHR} + + +=== Version History + + * Revision 1.0.0, 2020-04-21 + ** First assigned version. diff --git a/api/cl_khr_work_group_uniform_arithmetic.asciidoc b/api/cl_khr_work_group_uniform_arithmetic.asciidoc new file mode 100644 index 00000000..5dfb1281 --- /dev/null +++ b/api/cl_khr_work_group_uniform_arithmetic.asciidoc @@ -0,0 +1,49 @@ +// Copyright 2022-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_khr_work_group_uniform_arithmetic.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2022-04-29 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kevin Petit, Arm Ltd. + - Ben Ashbaugh, Intel + +=== Description + +`cl_khr_work_group_uniform_arithmetic` adds additional built-in work-group +collective functions to OpenCL C. +Specifically, this extension adds support for work-group scans and +reductions for the following operators: + + * Logical operations (`and`, `or`, and `xor`). + * Bitwise operations (`and`, `or`, and `xor`). + * Integer multiplication (`mul`). + * Floating-point multiplication (`mul`). + +See the +link:{OpenCLCSpecURL}#cl_khr_work_group_uniform_arithmetic[Work-group +Collective Uniform Arithmetic Functions] section of the OpenCL C +specification for more information. + +=== Issues + +. For these built-in functions, do we only want to support the types supported by the existing work-group collective functions, or do we want to support the types supported by the sub-group collective functions? ++ +-- +`RESOLVED`: The extension will require the same types as the existing +work-group collective functions. + +The difference are the 8-bit and 16-bit types: `char`, `uchar`, `short`, and +`ushort`. +Note that `half` is already supported, if half-precision is supported. +-- + +=== Version History + + * Revision 1.0.0, 2022-04-29 + ** Initial version. diff --git a/api/dictionary.asciidoc b/api/dictionary.asciidoc index 2f85dfdd..a75c7a6d 100644 --- a/api/dictionary.asciidoc +++ b/api/dictionary.asciidoc @@ -1,5 +1,4 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 include::{generated}/api/api-dictionary.asciidoc[] diff --git a/api/embedded_profile.asciidoc b/api/embedded_profile.asciidoc index cc53176c..8f841a1e 100644 --- a/api/embedded_profile.asciidoc +++ b/api/embedded_profile.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [[opencl-embedded-profile]] = OpenCL Embedded Profile diff --git a/api/footnotes.asciidoc b/api/footnotes.asciidoc index 66da48e8..e21cb871 100644 --- a/api/footnotes.asciidoc +++ b/api/footnotes.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 // Please keep footnotes in alphabetical order! @@ -11,10 +10,6 @@ Note that this flag does not provide meaning for atomic memory operations, but only for atomic fence operations in certain circumstances, refer to the Memory Scope section of the OpenCL C specification. \ ] -:fn-compatible-image-channel-orders: pass:n[ \ -This allows creation of a sRGB view of the image from a linear RGB view or vice-versa, i.e. the pixels stored in the image can be accessed as linear RGB or sRGB values. \ -] - :fn-create-context-all-or-subset: pass:n[ \ {clCreateContextfromType} may may create a context for all or a subset of the actual physical devices present in the platform that match _device_type_. \ ] @@ -125,7 +120,7 @@ Rather than attempt to share {cl_kernel_TYPE} objects among multiple host thread ] :fn-readimageh: pass:n[ \ -And *read_imageh*, if the *cl_khr_fp16* extension is supported. \ +And *read_imageh*, if the `<>` extension is supported. \ ] :fn-reference-count-usage: pass:n[ \ diff --git a/api/glossary.asciidoc b/api/glossary.asciidoc index dbe50cd0..4794c760 100644 --- a/api/glossary.asciidoc +++ b/api/glossary.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 // [glossary] // MK:Don't enable [glossary] - prevents chapter numbering. = Glossary diff --git a/api/introduction.asciidoc b/api/introduction.asciidoc index 74fea0ce..84ba8e1f 100644 --- a/api/introduction.asciidoc +++ b/api/introduction.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 = Introduction diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index a3b42ee2..3ad82c90 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 = The OpenCL Architecture @@ -300,7 +299,7 @@ within a work-group. The details of this mapping are described in the following section. -=== Mapping work-items onto an ND-range +=== Mapping Work-items Onto an Nd-range The index space supported by OpenCL is called an ND-range. An ND-range is an N-dimensional index space, where N is one, two or three. @@ -411,7 +410,7 @@ In this situation all sub-group scope functions are equivalent to their work-group level equivalents. -=== Execution of kernel-instances +=== Execution of Kernel-instances The work carried out by an OpenCL program occurs through the execution of kernel-instances on compute devices. @@ -515,7 +514,7 @@ work-group synchronization functions. [[device-side-enqueue]] -=== Device-side enqueue +=== Device-Side Enqueue NOTE: Device-side enqueue is <> version 2.0. @@ -990,6 +989,79 @@ explicitly manage association of memory objects with devices in order to improve performance. +ifdef::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] +[[lifetime-of-shared-opencl-direct3d-memory-objects]] +=== Lifetime of Shared Direct3D Memory Objects + +This section refers to similar Direct3D 10 and Direct3D 11 objects and +concepts such as _resources_, _reference counts_, and _devices_. + +Sharing is accomplished by creating an OpenCL context via the context create +parameters {CL_CONTEXT_D3D10_DEVICE_KHR} (for Direct3D 10, if the +`<>` extension is supported) or +{CL_CONTEXT_D3D11_DEVICE_KHR} (for Direct3D 11, if the +`<>` extension is supported. + +An OpenCL memory object created from a Direct3D resource remains valid as +long as the corresponding Direct3D resource has not been deleted. +If the Direct3D resource is deleted through the Direct3D API, subsequent use +of the OpenCL memory object will result in undefined behavior, including but +not limited to possible OpenCL errors, data corruption, and program +termination. + +The successful creation of a {cl_context_TYPE} against a Direct3D device +will increment the internal Direct3D reference count on the specified +device. +The internal Direct3D reference count on that Direct3D device will be +decremented when the OpenCL reference count on the returned OpenCL context +drops to zero. + +The OpenCL context and corresponding command-queues are dependent on the +existence of the Direct3D device from which the OpenCL context was created. +If the Direct3D device is deleted through the Direct3D API, subsequent use +of the OpenCL context will result in undefined behavior, including but not +limited to possible OpenCL errors, data corruption, and program termination. +endif::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] + + +ifdef::cl_khr_egl_image[] +==== Lifetime of Shared EGLImage Objects + +An OpenCL memory object created from an EGL `EGLImage` object remains valid +according to the lifetime behavior as described in the `EGL_KHR_image_base` +extension. + +Any `EGLImage` siblings exist in any client API context + +For OpenCL this means that while the application retains a reference on the +{cl_mem_TYPE} (the EGL sibling), the image remains valid. +endif::cl_khr_egl_image[] + + +ifdef::cl_khr_gl_sharing[] +[[lifetime-of-shared-opencl-opengl-memory-objects]] +=== Lifetime of Shared OpenCL/OpenGL Memory Objects + +An OpenCL memory object created from an OpenGL object (hereinafter referred +to as a "`shared OpenCL/OpenGL object`") remains valid as long as the +corresponding OpenGL object has not been deleted. +If the OpenGL object is deleted through the OpenGL API (e.g. +`glDeleteBuffers`, `glDeleteTextures`, or `glDeleteRenderbuffers`), +subsequent use of the OpenCL buffer or image object will result in undefined +behavior, including but not limited to possible OpenCL errors and data +corruption, but may not result in program termination. + +The OpenCL context and corresponding command-queues are dependent on the +existence of the OpenGL share group object, or the share group associated +with the OpenGL context from which the OpenCL context is created. +If the OpenGL share group object or all OpenGL contexts in the share group +are destroyed, any use of the OpenCL context or command-queue(s) will result +in undefined behavior, which may include program termination. +Applications should destroy the OpenCL command-queue(s) and OpenCL context +before destroying the corresponding OpenGL share group or contexts +endif::cl_khr_gl_sharing[] + + [[shared-virtual-memory]] === Shared Virtual Memory @@ -1232,7 +1304,7 @@ If these guidelines are followed in your OpenCL programs, you can skip the detailed rules behind the relaxed memory models and go directly to <>. -=== Overview of atomic and fence operations +=== Overview of Atomic and Fence Operations OpenCL 2.x has a number of _synchronization operations_ that are used to define memory order constraints in a program. @@ -1741,7 +1813,7 @@ conditions required for *X* to local-synchronize-with *Y* are met, or both sets of conditions are met. -==== Work-group Functions +==== Work-Group Functions The OpenCL kernel execution model includes collective operations across the work-items within a single work-group. @@ -1794,10 +1866,10 @@ must execute the same work-group function call site, or dynamic work-group function instance. -==== Sub-group Functions +==== Sub-Group Functions NOTE: Sub-group functions are <> version 2.1. -Also see extension *cl_khr_subgroups*. +Also see `<>`. The OpenCL kernel execution model includes collective operations across the work-items within a single sub-group. @@ -1849,7 +1921,7 @@ must execute the same sub-group function call site, or dynamic sub-group function instance. -==== Host-side and Device-side Commands +==== Host-Side and Device-Side Commands This section describes how the OpenCL API functions associated with command-queues contribute to happens-before relations. @@ -2111,6 +2183,8 @@ It is therefore strongly recommended that applications <> supported by the OpenCL device they are running on in order to remain robust to future changes. + +[[versioning]] === Versioning The OpenCL specification is regularly updated with bug fixes and clarifications. @@ -2120,35 +2194,46 @@ and to provide a way to identify each set of changes, the OpenCL API, C language intermediate languages and extensions maintain a version number. Built-in kernels are also versioned. -==== Versions + +[[version-numbers]] +==== Version Numbers A version number comprises three logical fields: -* The _major_ version indicates a significant change. Backwards compatibility may - break across major versions. -* The _minor_ version indicates the addition of new functionality with backwards - compatibility for any existing profiles. -* The _patch_ version indicates bug fixes, clarifications and general improvements. + * The _major_ version indicates a significant change. Backwards + compatibility may break across major versions. + * The _minor_ version indicates the addition of new functionality with + backwards compatibility for any existing profiles. + * The _patch_ version indicates bug fixes, clarifications and general + improvements. -Version numbers are represented using the {cl_version_TYPE} type that is an alias for -a 32-bit integer. The fields are packed as follows: +Version numbers are represented using the {cl_version_TYPE} type that is an +alias for a 32-bit integer. +The fields are packed as follows: -* The _major_ version is a 10-bit integer packed into bits 31-22. -* The _minor_ version is a 10-bit integer packed into bits 21-12. -* The _patch_ version is a 12-bit integer packed into bits 11-0. + * The _major_ version is a 10-bit integer packed into bits 31-22. + * The _minor_ version is a 10-bit integer packed into bits 21-12. + * The _patch_ version is a 12-bit integer packed into bits 11-0. This enables versions to be ordered using standard C/C++ operators. A number of convenience macros are provided by the OpenCL Headers to make working with version numbers easier. -`CL_VERSION_MAJOR` extracts the _major_ version from a packed {cl_version_TYPE}. + -`CL_VERSION_MINOR` extracts the _minor_ version from a packed {cl_version_TYPE}. + -`CL_VERSION_PATCH` extracts the _patch_ version from a packed {cl_version_TYPE}. + -`CL_MAKE_VERSION` returns a packed {cl_version_TYPE} from a _major_, _minor_ and -_patch_ version. - -These are defined as follows: + * `CL_VERSION_MAJOR` extracts the _major_ version from a packed + {cl_version_TYPE}. + * `CL_VERSION_MINOR` extracts the _minor_ version from a packed + {cl_version_TYPE}. + * `CL_VERSION_PATCH` extracts the _patch_ version from a packed + {cl_version_TYPE}. + * `CL_MAKE_VERSION` returns a packed `cl_version_TYPE} from a + _major_, _minor_ and _patch_ version. + * {CL_VERSION_MAJOR_BITS_anchor}, {CL_VERSION_MINOR_BITS_anchor}, and + {CL_VERSION_PATCH_BITS_anchor} are the number of bits in the + corresponding field. + * `CL_VERSION_MAJOR_MASK`, `CL_VERSION_MINOR_MASK`, and + {CL_VERSION_PATCH_MASK` are bitmasks used to extract the + corresponding packed fields from the version number. [source,opencl] ---- @@ -2178,14 +2263,18 @@ typedef cl_uint cl_version; ((patch) & CL_VERSION_PATCH_MASK)) ---- -==== Version name pairing +[[version-name-pairing]] +==== Version-Name Pairing -It is sometimes necessary to associate a version to an entity it applies to -(e.g. extension or built-in kernel). This is done using a dedicated -{cl_name_version_TYPE} structure, defined as follows: +[open,refpage='cl_name_version',desc='Structure describing a version number and corresponding entity name',type='structs'] +-- +The {cl_name_version_TYPE} structure describes a version number and a +corresponding entity (e.g. extension or built-in kernel) name: include::{generated}/api/structs/cl_name_version.txt[] -The `name` field is an array of `CL_NAME_VERSION_MAX_NAME_SIZE` bytes used as -storage for a NUL-terminated string whose maximum length is therefore -`CL_NAME_VERSION_MAX_NAME_SIZE - 1`. + * _version_ is a <>. + * _name_ is an array of {CL_NAME_VERSION_MAX_NAME_SIZE_anchor} `char` + containing a null-terminated string whose maximum length is therefore + {CL_NAME_VERSION_MAX_NAME_SIZE} - 1`. +-- diff --git a/api/opencl_assoc_spec.asciidoc b/api/opencl_assoc_spec.asciidoc index aa19b1ad..08627472 100644 --- a/api/opencl_assoc_spec.asciidoc +++ b/api/opencl_assoc_spec.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 = Associated OpenCL specification diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 3aeba737..b2707919 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [[opencl-platform-layer]] = The OpenCL Platform Layer @@ -22,10 +21,10 @@ include::{generated}/api/version-notes/clGetPlatformIDs.asciidoc[] * _num_entries_ is the number of {cl_platform_id_TYPE} entries that can be added to _platforms_. - If _platforms_ is not `NULL`, the _num_entries_ must be greater than zero. + If _platforms_ is not `NULL`, _num_entries_ must be greater than zero. * _platforms_ returns a list of OpenCL platforms found. - The {cl_platform_id_TYPE} values returned in _platforms_ can be used to identify a - specific OpenCL platform. + The {cl_platform_id_TYPE} values returned in _platforms_ can be used to + identify a specific OpenCL platform. If _platforms_ is `NULL`, this argument is ignored. The number of OpenCL platforms returned is the minimum of the value specified by _num_entries_ or the number of OpenCL platforms available. @@ -35,15 +34,58 @@ include::{generated}/api/version-notes/clGetPlatformIDs.asciidoc[] // refError {clGetPlatformIDs} returns {CL_SUCCESS} if the function is executed -successfully. +ifndef::cl_khr_icd[successfully.] +ifdef::cl_khr_icd[] +and, if the `<>` extension is supported, there are a non-zero +number of platforms available. +endif::cl_khr_icd[] Otherwise, it returns one of the following errors: +ifdef::cl_khr_icd[] + * {CL_PLATFORM_NOT_FOUND_KHR} if the `<>` extension is + supported and zero platforms are available. +endif::cl_khr_icd[] * {CL_INVALID_VALUE} if _num_entries_ is equal to zero and _platforms_ is not `NULL` or if both _num_platforms_ and _platforms_ are `NULL`. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +ifdef::cl_khr_icd[] +[open,refpage='clIcdGetPlatformIDsKHR',desc='Query platforms accessible through the Khronos ICD Loader',type='protos'] +-- +To obtain the list of platforms accessible through the Khronos ICD Loader, +call the function: + +include::{generated}/api/protos/clIcdGetPlatformIDsKHR.txt[] +include::{generated}/api/version-notes/clIcdGetPlatformIDsKHR.asciidoc[] + + * _num_entries_ is the number of {cl_platform_id_TYPE} entries that can be + added to _platforms_. + If _platforms_ is not `NULL`, then _num_entries_ must be greater than + zero. + * _platforms_ returns a list of OpenCL platforms available for access + through the Khronos ICD Loader. + The {cl_platform_id_TYPE} values returned in _platforms_ are ICD + compatible and can be used to identify a specific OpenCL platform. + If the _platforms_ argument is `NULL`, then this argument is ignored. + The number of OpenCL platforms returned is the minimum of the value + specified by _num_entries_ or the number of OpenCL platforms available. + * _num_platforms_ returns the number of OpenCL platforms available. + If _num_platforms_ is `NULL`, then this argument is ignored. + +// refError + +{clIcdGetPlatformIDsKHR} returns {CL_SUCCESS} if the function is executed +successfully and there are a non zero number of platforms available. +Otherwise, it returns one of the following errors: + + * {CL_PLATFORM_NOT_FOUND_KHR} if zero platforms are available. + * {CL_INVALID_VALUE} if _num_entries_ is equal to zero and _platforms_ is + not `NULL` or if both _num_platforms_ and _platforms_ are `NULL`. +-- +endif::cl_khr_icd[] + [open,refpage='clGetPlatformInfo',desc='Query information about an OpenCL platform',type='protos'] -- Specific information about an OpenCL platform can be obtained with @@ -114,7 +156,17 @@ include::{generated}/api/version-notes/CL_PLATFORM_VERSION.asciidoc[] | {CL_PLATFORM_NUMERIC_VERSION_anchor} include::{generated}/api/version-notes/CL_PLATFORM_NUMERIC_VERSION.asciidoc[] + +ifdef::cl_khr_extended_versioning[] +or + +{CL_PLATFORM_NUMERIC_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_NUMERIC_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_version_TYPE} + +ifdef::cl_khr_extended_versioning[or {cl_version_khr_TYPE}] | Returns the detailed (major, minor, patch) version supported by the platform. The major and minor version numbers returned must match those returned via {CL_PLATFORM_VERSION}. @@ -141,11 +193,22 @@ include::{generated}/api/version-notes/CL_PLATFORM_EXTENSIONS.asciidoc[] | {CL_PLATFORM_EXTENSIONS_WITH_VERSION_anchor} include::{generated}/api/version-notes/CL_PLATFORM_EXTENSIONS_WITH_VERSION.asciidoc[] + +ifdef::cl_khr_extended_versioning[] +or + +{CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_name_version_TYPE}[] + +ifdef::cl_khr_extended_versioning[or {cl_name_version_khr_TYPE}[]] | Returns an array of description (name and version) structures that lists all the extensions supported by the platform. The same extension name must not be reported more than once. The list of extensions reported must match the list reported via {CL_PLATFORM_EXTENSIONS}. + | {CL_PLATFORM_HOST_TIMER_RESOLUTION_anchor} include::{generated}/api/version-notes/CL_PLATFORM_HOST_TIMER_RESOLUTION.asciidoc[] @@ -157,6 +220,87 @@ include::{generated}/api/version-notes/CL_PLATFORM_HOST_TIMER_RESOLUTION.asciido platforms supporting OpenCL 2.1 or 2.2. This value must be 0 for devices that do not support device and host timer synchronization. + +ifdef::cl_khr_command_buffer_multi_device[] +| {CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR.asciidoc[] + | {cl_platform_command_buffer_capabilities_khr_TYPE} + | Describes platform command-buffer capabilities, encoded as bits in a + bitfield. + Supported capabilities are: + + {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR_anchor} - Platform + supports the ability to synchronize all commands in a command-buffer + using sync-points, irrespective of the queue the individual commands + are recorded to. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR.asciidoc[] + + {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR_anchor} - Platform + supports the ability to create a deep copy of an existing + command-buffer with the commands explicitly remapped to different, + potentially <>, queues. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR.asciidoc[] + + {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR_anchor} - Platform + supports the ability to create a remapped command-buffer where the + mapping of commands to queues is done by the OpenCL runtime in a way + it determines as optimal. + If {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} is reported, + {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} must also be reported. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR.asciidoc[] +endif::cl_khr_command_buffer_multi_device[] + +ifdef::cl_khr_external_memory[] +| {CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_memory_handle_type_khr_TYPE}[] + | Returns the list of importable external memory handle types + supported by all devices in _platform_. +endif::cl_khr_external_memory[] + +ifdef::cl_khr_semaphore[] +| {CL_PLATFORM_SEMAPHORE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_SEMAPHORE_TYPES_KHR.asciidoc[] + | {cl_semaphore_type_khr_TYPE}[] + | Returns the list of the semaphore types supported all devices in + _platform_. +endif::cl_khr_semaphore[] + +ifdef::cl_khr_external_semaphore[] +| {CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Returns the list of importable external semaphore handle types + supported by all devices in _platform_. + + The size of this query may be 0 if no importable external semaphore + handle types are supported by all devices in _platform_. +| {CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Returns the list of exportable external semaphore handle types + supported by all devices in the platform. + + This size of this query may be 0 if no exportable external semaphore + handle types are supported by all devices in _platform_. +endif::cl_khr_external_semaphore[] + +ifdef::cl_khr_icd[] +| {CL_PLATFORM_ICD_SUFFIX_KHR_anchor} + +include::{generated}/api/version-notes/CL_PLATFORM_ICD_SUFFIX_KHR.asciidoc[] + | {char_TYPE}[] + | The function name suffix used to identify extension functions to be + directed to this platform by the ICD Loader. +endif::cl_khr_icd[] |==== // refError @@ -417,7 +561,7 @@ include::{generated}/api/version-notes/CL_DEVICE_MAX_WORK_GROUP_SIZE.asciidoc[] If double precision is not supported, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE} must return 0. - If the *cl_khr_fp16* extension is not supported, + If the `<>` extension is not supported, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF} must return 0. | {CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR_anchor} + {CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT_anchor} + @@ -438,7 +582,7 @@ include::{generated}/api/version-notes/CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR.asciid If double precision is not supported, {CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE} must return 0. - If the *cl_khr_fp16* extension is not supported, + If the `<>` extension is not supported, {CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF} must return 0. | {CL_DEVICE_MAX_CLOCK_FREQUENCY_anchor} @@ -506,29 +650,51 @@ include::{generated}/api/version-notes/CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS.ascii | {CL_DEVICE_IL_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_IL_VERSION.asciidoc[] -Also see extension *cl_khr_il_program*. + +ifdef::cl_khr_il_program[] +or + +{CL_DEVICE_IL_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_IL_VERSION_KHR.asciidoc[] +endif::cl_khr_il_program[] | {char_TYPE}[] | The intermediate languages that can be supported by {clCreateProgramWithIL} for this device. Returns a space-separated list of IL version strings of the form - _.. + + _. For an OpenCL 2.1 or 2.2 device, SPIR-V is a required IL prefix. If the device does not support intermediate language programs, the value must be `""` (an empty string). +ifdef::cl_khr_il_program[] + A device that supports the `<>` extension must + support the `"SPIR-V"` IL prefix. +endif::cl_khr_il_program[] | {CL_DEVICE_ILS_WITH_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_ILS_WITH_VERSION.asciidoc[] -Also see extension *cl_khr_il_program*. + +ifdef::cl_khr_extended_versioning[] +or + +{CL_DEVICE_ILS_WITH_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_ILS_WITH_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_name_version_TYPE}[] + +ifdef::cl_khr_extended_versioning[or {cl_name_version_khr_TYPE}[]] | Returns an array of descriptions (name and version) for all supported - intermediate languages. Intermediate languages with the same name may be - reported more than once but each name and major/minor version - combination may only be reported once. The list of intermediate - languages reported must match the list reported via - {CL_DEVICE_IL_VERSION}. + intermediate languages. + Intermediate languages with the same name may be reported more than + once but each name and major/minor version combination may only be + reported once. + The list of intermediate languages reported must match the list + reported via {CL_DEVICE_IL_VERSION}. For an OpenCL 2.1 or 2.2 device, at least one version of SPIR-V must be reported. @@ -601,6 +767,11 @@ include::{generated}/api/version-notes/CL_DEVICE_MAX_SAMPLERS.asciidoc[] | {CL_DEVICE_IMAGE_PITCH_ALIGNMENT_anchor} include::{generated}/api/version-notes/CL_DEVICE_IMAGE_PITCH_ALIGNMENT.asciidoc[] + +ifdef::cl_khr_image2d_from_buffer[] +The equivalent {CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR_anchor} may be used if +the `<>` extension is supported. +endif::cl_khr_image2d_from_buffer[] | {cl_uint_TYPE} | The row pitch alignment size in pixels for 2D images created from a buffer. @@ -613,6 +784,11 @@ include::{generated}/api/version-notes/CL_DEVICE_IMAGE_PITCH_ALIGNMENT.asciidoc[ | {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_anchor} include::{generated}/api/version-notes/CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT.asciidoc[] + +ifdef::cl_khr_image2d_from_buffer[] +The equivalent {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_anchor} may be used +if the `<>` extension is supported. +endif::cl_khr_image2d_from_buffer[] | {cl_uint_TYPE} | This query specifies the minimum alignment in pixels of the host_ptr specified to {clCreateBuffer} or {clCreateBufferWithProperties} when a 2D image @@ -711,7 +887,8 @@ include::{generated}/api/version-notes/CL_DEVICE_SINGLE_FP_CONFIG.asciidoc[] | {CL_DEVICE_DOUBLE_FP_CONFIG_anchor} footnote:native-rounding-modes[] include::{generated}/api/version-notes/CL_DEVICE_DOUBLE_FP_CONFIG.asciidoc[] -Also see extension *cl_khr_fp64*. + +Also see `<>`. | {cl_device_fp_config_TYPE} | Describes double precision floating-point capability of the OpenCL device. @@ -987,10 +1164,21 @@ include::{generated}/api/version-notes/CL_DEVICE_BUILT_IN_KERNELS.asciidoc[] | {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION.asciidoc[] + +ifdef::cl_khr_extended_versioning[] +or + +{CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_name_version_TYPE}[] + +ifdef::cl_khr_extended_versioning[or {cl_name_version_khr_TYPE}[]] | Returns an array of descriptions for the built-in kernels supported by - the device. Each built-in kernel may only be reported once. The list of - reported kernels must match the list returned via + the device. + Each built-in kernel may only be reported once. + The list of reported kernels must match the list returned via {CL_DEVICE_BUILT_IN_KERNELS}. | {CL_DEVICE_PLATFORM_anchor} @@ -1044,16 +1232,27 @@ include::{generated}/api/version-notes/CL_DEVICE_VERSION.asciidoc[] | {CL_DEVICE_NUMERIC_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_NUMERIC_VERSION.asciidoc[] + +ifdef::cl_khr_extended_versioning[] +or + +{CL_DEVICE_NUMERIC_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_NUMERIC_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_version_TYPE} + +ifdef::cl_khr_extended_versioning[or {cl_version_khr_TYPE}] | Returns the detailed (major, minor, patch) version supported by the - device. The major and minor version numbers returned must match - those returned via {CL_DEVICE_VERSION}. + device. + The major and minor version numbers returned must match those + returned via {CL_DEVICE_VERSION}. | {CL_DEVICE_OPENCL_C_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_OPENCL_C_VERSION.asciidoc[] | {char_TYPE}[] - | Returns the highest fully backwards compatible OpenCL C version + | Returns the highest fully backwards compatible OpenCL C version supported by the compiler for the device. For devices supporting compilation from OpenCL C source, this will return a version string with the following format: @@ -1121,6 +1320,20 @@ include::{generated}/api/version-notes/CL_DEVICE_OPENCL_C_ALL_VERSIONS.asciidoc[ For devices that do not support compilation from OpenCL C source, this query may return an empty array. +ifdef::cl_khr_extended_versioning[] +| {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR.asciidoc[] + | {cl_version_khr_TYPE} + | Returns detailed (major, minor, patch) numeric version information. + The major and minor version numbers returned must match those returned + via {CL_DEVICE_OPENCL_C_VERSION}. + + This query was not promoted to core in OpenCL version 3.0, but the + core query {CL_DEVICE_OPENCL_C_ALL_VERSIONS} can be used to obtain + equivalent information. +endif::cl_khr_extended_versioning[] + | {CL_DEVICE_OPENCL_C_FEATURES_anchor} include::{generated}/api/version-notes/CL_DEVICE_OPENCL_C_FEATURES.asciidoc[] @@ -1147,26 +1360,26 @@ include::{generated}/api/version-notes/CL_DEVICE_EXTENSIONS.asciidoc[] The following Khronos extension names must be returned by all devices that support OpenCL 1.1: - *cl_khr_byte_addressable_store* + - *cl_khr_global_int32_base_atomics* + - *cl_khr_global_int32_extended_atomics* + - *cl_khr_local_int32_base_atomics* + - *cl_khr_local_int32_extended_atomics* + `<>` + + `<>` + + `<>` + + `<>` + + `<>` Additionally, the following Khronos extension names must be returned by all devices that support OpenCL 2.0, OpenCL 2.1, or OpenCL 2.2. For devices that support OpenCL 3.0, these extension names must be returned when and only when the optional feature is supported: - *cl_khr_3d_image_writes* + - *cl_khr_depth_images* + - *cl_khr_image2d_from_buffer* + `<>` + + `<>` + + `<>` Please refer to the OpenCL Extension Specification or vendor provided documentation for a detailed description of these extensions. @@ -1174,7 +1387,17 @@ include::{generated}/api/version-notes/CL_DEVICE_EXTENSIONS.asciidoc[] | {CL_DEVICE_EXTENSIONS_WITH_VERSION_anchor} include::{generated}/api/version-notes/CL_DEVICE_EXTENSIONS_WITH_VERSION.asciidoc[] + +ifdef::cl_khr_extended_versioning[] +or + +{CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR.asciidoc[] +endif::cl_khr_extended_versioning[] | {cl_name_version_TYPE}[] + +ifdef::cl_khr_extended_versioning[or {cl_name_version_khr_TYPE}[]] | Returns an array of description (name and version) structures. The same extension name must not be reported more than once. The list of extensions reported must match the list reported via @@ -1354,7 +1577,7 @@ include::{generated}/api/version-notes/CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_P sub-groups, {CL_FALSE} otherwise. This query must return {CL_TRUE} for devices that support the - *cl_khr_subgroups* extension, and must return {CL_FALSE} for + `<>` extension, and must return {CL_FALSE} for devices that do not support sub-groups. | {CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES_anchor} @@ -1465,8 +1688,418 @@ include::{generated}/api/version-notes/CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASS | Returns the latest version of the conformance test suite that this device has fully passed in accordance with the official conformance process. +ifdef::cl_khr_command_buffer[] +| {CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR.asciidoc[] + + | {cl_device_command_buffer_capabilities_khr_TYPE} + | Describes device command-buffer capabilities, encoded as bits in a bitfield. + Supported capabilities are: + + {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR_anchor} Device + supports the ability to record commands that execute kernels which + contain printf calls. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR.asciidoc[] + + {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR_anchor} Device + supports the ability to record commands that execute kernels which + contain device-side kernel-enqueue calls. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR.asciidoc[] + + {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR_anchor} Device + supports the command-buffers having a <> that exceeds 1. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR.asciidoc[] + + {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR_anchor} Device + supports the ability to record command-buffers to out-of-order + command-queues. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR.asciidoc[] + +ifdef::cl_khr_command_buffer_multi_device[] + {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR_anchor} Device + supports the ability to record commands to more than one + command-queue associated with _device_ in a single command-buffer. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR.asciidoc[] +endif::cl_khr_command_buffer_multi_device[] + +| {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR.asciidoc[] + + | {cl_command_queue_properties_TYPE} + | Bitmask of the minimum properties with which a command-queue must be created + to allow a command-buffer to be executed on it. + It is valid for a command-queue to be created with extra properties + in addition to this base requirement and still be compatible with + command-buffer execution. +endif::cl_khr_command_buffer[] + +ifdef::cl_khr_command_buffer_multi_device[] +| {CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR.asciidoc[] + | {cl_uint_TYPE} + | Return the number of root devices listed in + {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR} that _device_ can use + device-side synchronization with. + +| {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR.asciidoc[] + | {cl_device_id_TYPE}[] + | Return the list of root devices _device_ can use device-side + synchronization with. + A device should list itself only if it has native support for + synchronizing commands. + Sub-devices are not listed to avoid non-deterministic results as + sub-devices are created. + Instead if a root device is listed, then any of its partitioned + sub-devices can also be natively synchronized with. +endif::cl_khr_command_buffer_multi_device[] + +ifdef::cl_khr_command_buffer_mutable_dispatch[] +| {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR.asciidoc[] + | {cl_mutable_dispatch_fields_khr_TYPE} + | Describes device mutable-dispatch capabilities, encoded as bits in a + bitfield. + Supported capabilities are: + + {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR_anchor} - Device supports the + ability to modify the _global_work_offset_ of kernel execution after + command recording. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR_anchor} - Device supports the + ability to modify the _global_work_size_ of kernel execution after + command recording. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR_anchor} - Device supports the + ability to modify the _local_work_size_ of kernel execution after + command recording. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR_anchor} - Device supports the + ability to modify arguments set on a kernel after command recording. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ARGUMENTS_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR_anchor} - Device supports the + ability to modify execution information set on a kernel after command + recording. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_EXEC_INFO_KHR.asciidoc[] +endif::cl_khr_command_buffer_mutable_dispatch[] + +ifdef::cl_khr_device_uuid[] +| {CL_DEVICE_UUID_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_UUID_KHR.asciidoc[] + + | {cl_uchar_TYPE}[{CL_UUID_SIZE_KHR}] + | Returns a universally unique identifier (UUID) for the device. + + Device UUIDs must be immutable for a given device across processes, + driver APIs, driver versions, and system reboots. +| {CL_DRIVER_UUID_KHR_anchor} + +include::{generated}/api/version-notes/CL_DRIVER_UUID_KHR.asciidoc[] + + | {cl_uchar_TYPE}[{CL_UUID_SIZE_KHR}] + | Returns a universally unique identifier (UUID) for the software driver + for the device. +| {CL_DEVICE_LUID_VALID_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_LUID_VALID_KHR.asciidoc[] + + | {cl_bool_TYPE} + | Returns {CL_TRUE} if the device has a valid LUID and {CL_FALSE} + otherwise. +| {CL_DEVICE_LUID_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_LUID_KHR.asciidoc[] + + | {cl_uchar_TYPE}[{CL_LUID_SIZE_KHR}] + | Returns a locally unique identifier (LUID) for the device. + + It is not an error to query {CL_DEVICE_LUID_KHR} when + {CL_DEVICE_LUID_VALID_KHR} returns {CL_FALSE}, but in this case the + returned LUID value is undefined. + + When {CL_DEVICE_LUID_VALID_KHR} returns {CL_TRUE}, and the OpenCL + device is running on the Windows operating system, the returned LUID + value can be cast to an `LUID` object and must be equal to the locally + unique identifier of an `IDXGIAdapter1` object that corresponds to the + OpenCL device. +| {CL_DEVICE_NODE_MASK_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_NODE_MASK_KHR.asciidoc[] + + | {cl_uint_TYPE} + | Returns a node mask for the device. + + It is not an error to query {CL_DEVICE_NODE_MASK_KHR} when + {CL_DEVICE_LUID_VALID_KHR} returns {CL_FALSE}, but in this case the + returned node mask is undefined. + + When {CL_DEVICE_LUID_VALID_KHR} returns {CL_TRUE}, the returned node + mask must contain exactly one bit. + If the OpenCL device is running on an operating system that supports + the Direct3D 12 API and the OpenCL device corresponds to an individual + device in a linked device adapter, the returned node mask identifies + the Direct3D 12 node corresponding to the OpenCL device. + Otherwise, the returned node mask must be `1`. +endif::cl_khr_device_uuid[] + +ifdef::cl_khr_external_memory[] +| {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR.asciidoc[] + + | {cl_external_memory_handle_type_khr_TYPE}[] + | Returns the list of importable external memory handle types + supported by _device_. + + Must return a non-empty list of external memory handle types for at + least one of the devices in the platform. + +| {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR.asciidoc[] + + | {cl_external_memory_handle_type_khr_TYPE}[] + | Returns the list of importable external memory handle types + supported by _device_, that are assumed to apply linear layout to + imported images when no other tiling information is provided. + + This list contains a subset of + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR}. + The returned list may be empty. + + External memory handle types not in + {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} + may have any memory layout. + The layout interpretation of images imported with these handle types + is implementation defined. +endif::cl_khr_external_memory[] + +ifdef::cl_khr_fp16[] +| {CL_DEVICE_HALF_FP_CONFIG_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_HALF_FP_CONFIG.asciidoc[] + | {cl_device_fp_config_TYPE} + | Describes half-precision floating-point capability of the OpenCL + device. + This is a bit-field that describes one or more of the following + values: + + {CL_FP_DENORM} - denorms are supported + + {CL_FP_INF_NAN} - INF and NaNs are supported + + {CL_FP_ROUND_TO_NEAREST} - round to nearest even rounding mode + supported + + {CL_FP_ROUND_TO_ZERO} - round to zero rounding mode supported + + {CL_FP_ROUND_TO_INF} - round to positive and negative infinity + rounding modes supported + + {CL_FP_FMA} - IEEE754-2008 fused multiply-add is supported + + {CL_FP_SOFT_FLOAT} - Basic floating-point operations (such as + addition, subtraction, multiplication) are implemented in software + + If half-precision is supported by the device, then the minimum + half-precision floating-point capability for OpenCL 2.0 or newer + devices is: + + {CL_FP_ROUND_TO_ZERO} + + or + + {CL_FP_ROUND_TO_NEAREST} \| + + {CL_FP_INF_NAN}. +endif::cl_khr_fp16[] + +ifdef::cl_khr_integer_dot_product[] +| {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR.asciidoc[] + | {cl_device_integer_dot_product_capabilities_khr_type} + | Returns the integer dot product capabilities supported by the + device. + + {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} is always + set, indicating that all implementations that support + `<>` must support dot product built-in + functions and, when SPIR-V is supported, SPIR-V instructions that + take four-component vectors of 8-bit integers packed into 32-bit + integers as input. + + {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} is set when dot + product built-in functions and, when SPIR-V is supported, SPIR-V + instructions that take four-component of 8-bit elements as input are + supported. + NOTE: {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} + must be set in version 2.x of the extension. + +| {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR.asciidoc[] + + | {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} + | Returns a structure describing the exact 8-bit dot product + combinations that are <> on the device. + Each member is {CL_TRUE} if the combination it corresponds to is + accelerated, {CL_FALSE} otherwise. + NOTE: + {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR} is + missing before version 2.0 of the extension. + +| {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR.asciidoc[] + + | {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} + | Returns a structure describing the exact 4x8-bit packed dot product + combinations that are <> on the device. + Each member is {CL_TRUE} if the combination it corresponds to is + accelerated, {CL_FALSE} otherwise. + NOTE: + {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR} + is missing before version 2.0 of the extension. +endif::cl_khr_integer_dot_product[] + +ifdef::cl_khr_pci_bus_info[] +| {CL_DEVICE_PCI_BUS_INFO_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_PCI_BUS_INFO_KHR.asciidoc[] + | {cl_device_pci_bus_info_khr_TYPE} + | Returns PCI bus information for the device. + + The PCI bus information is returned as a single structure that + includes the PCI bus domain, the PCI bus identifier, the PCI device + identifier, and the PCI device function identifier. +endif::cl_khr_pci_bus_info[] + +ifdef::cl_khr_semaphore[] +| {CL_DEVICE_SEMAPHORE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_SEMAPHORE_TYPES_KHR.asciidoc[] + | {cl_semaphore_type_khr_TYPE}[] + | Returns the list of the semaphore types supported by _device_. + + Must return a non-empty list for at least one of the devices in the + platform, meeting the minimum requirements described for + {cl_semaphore_type_khr_TYPE}. +endif::cl_khr_semaphore[] + +ifdef::cl_khr_external_semaphore[] +| {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Returns the list of importable external semaphore handle types + supported by _device_. + + This size of this query may be 0 indicating that the device does not + support importing semaphores. +| {CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Returns the list of exportable external semaphore handle types + supported by _device_. + + This size of this query may be 0 indicating that the device does not + support exporting semaphores. +endif::cl_khr_external_semaphore[] + +ifdef::cl_khr_spir[] +| {CL_DEVICE_SPIR_VERSIONS_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_SPIR_VERSIONS.asciidoc[] + | {char_TYPE}[] + | A space separated list of SPIR versions supported by the device. + + For example, returning `"1.2"` in this query implies that SPIR version + 1.2 is supported by the implementation. +endif::cl_khr_spir[] + +ifdef::cl_khr_subgroup_named_barrier[] +| {CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR.asciidoc[] + | {cl_uint_TYPE} + | Maximum number of named barriers in a work-group for any given + kernel-instance running on the device. + The minimum value is 8. +endif::cl_khr_subgroup_named_barrier[] + +ifdef::cl_khr_terminate_context[] +| {CL_DEVICE_TERMINATE_CAPABILITY_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_TERMINATE_CAPABILITY_KHR.asciidoc[] + | {cl_device_terminate_capability_khr_TYPE} + | Describes the termination capability of the OpenCL device. + This is a bit-field, where the following values are currently + supported: + + {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR_anchor} - Indicates that + context termination is supported. +endif::cl_khr_terminate_context[] |==== +ifdef::cl_khr_integer_dot_product[] +OpenCL 3 devices must report the following feature macros via +{CL_DEVICE_OPENCL_C_FEATURES} when the corresponding bit is set in the bitfield +returned for {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR}: + +[cols="1,1",options="header"] +|==== +| Feature Bit | Feature Macro +| {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} + | `__opencl_c_integer_dot_product_input_4x8bit_packed` +| {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} + | `__opencl_c_integer_dot_product_input_4x8bit` +|==== +endif::cl_khr_integer_dot_product[] + +ifdef::cl_khr_external_semaphore[] +One of the two queries {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} and +{CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} must return a non-empty list +indicating support for at least one of the valid semaphore handle types +either for import, for export, or both. +endif::cl_khr_external_semaphore[] + +ifdef::cl_khr_device_uuid[] +[NOTE] +.Note +==== +While {CL_DEVICE_UUID_KHR} is specified to remain consistent across driver +versions and system reboots, it is not intended to be usable as a +serializable persistent identifier for a device. +It may change when a device is physically added to, removed from, or moved +to a different connector in a system while that system is powered down. +Further, there is no reasonable way to verify with conformance testing that +a given device retains the same UUID in a given system across all driver +versions supported in that system. +While implementations should make every effort to report consistent device +UUIDs across driver versions, applications should avoid relying on the +persistence of this value for uses other than identifying compatible devices +for external object sharing purposes. +==== +endif::cl_khr_device_uuid[] + // refError {clGetDeviceInfo} returns {CL_SUCCESS} if the function is executed @@ -1486,6 +2119,55 @@ Otherwise, it returns one of the following errors: required by the OpenCL implementation on the host. -- +ifdef::cl_khr_integer_dot_product[] +[open,refpage='cl_device_integer_dot_product_acceleration_properties_khr',desc='Structure describing supported dot product operations',type='structs'] +-- +[[integer-dot-product-acceleration-properties]] +The {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} structure +describes the exact dot product operations that are accelerated on the device: + +include::{generated}/api/structs/cl_device_integer_dot_product_acceleration_properties_khr.txt[] + + * _signed_accelerated_ is {CL_TRUE} when signed dot product operations are + accelerated, {CL_FALSE} otherwise. + * _unsigned_accelerated_ is {CL_TRUE} when unsigned dot product operations + are accelerated, {CL_FALSE} otherwise. + * _mixed_signedness_accelerated_ is {CL_TRUE} when mixed signedness dot + product operations are accelerated, {CL_FALSE} otherwise. + * _accumulating_saturating_signed_accelerated_ is {CL_TRUE} when accumulating + saturating signed dot product operations are accelerated, {CL_FALSE} + otherwise. + * _accumulating_saturating_unsigned_accelerated_ is {CL_TRUE} when accumulating + saturating unsigned dot product operations are accelerated, {CL_FALSE} + otherwise. + * _accumulating_saturating_mixed_signedness_accelerated_ is {CL_TRUE} when + accumulating saturating mixed signedness dot product operations are + accelerated, {CL_FALSE} otherwise. + +A dot product operation is deemed accelerated if its implementation provides +a performance advantage over application-provided code composed from elementary +instructions and/or other dot product instructions, either because the +implementation uses optimized machine code sequences whose generation from +application-provided code cannot be guaranteed or because it uses hardware +features that cannot otherwise be targeted from application-provided code. +-- +endif::cl_khr_integer_dot_product[] + +ifdef::cl_khr_pci_bus_info[] +[open,refpage='cl_device_pci_bus_info_khr',desc='Structure describing PCI bus information',type='structs'] +-- +The {cl_device_pci_bus_info_khr_TYPE} structure describes PCI bus +information for a device: + +include::{generated}/api/structs/cl_device_pci_bus_info_khr.txt[] + + * _pci_domain_ is the PCI bus domain of the device. + * _pci_bus_ is the PCI bus identified of the device. + * _pci_device_ is the PCI device identifier of the device. + * _pci_function_ is the PCI device function identifier of the device. +-- +endif::cl_khr_pci_bus_info[] + [open,refpage='clGetDeviceAndHostTimer',desc='Query synchronized host and device timestamps',type='protos'] -- To query device and host timestamps, call the function: @@ -1575,6 +2257,336 @@ Otherwise, it returns one of the following errors: -- +ifdef::cl_khr_dx9_media_sharing[] +=== Sharing DirectX9 Media Surfaces With OpenCL Images + +This section discusses OpenCL functions that allow applications to use media +surfaces as OpenCL memory objects. +This allows efficient sharing of data between OpenCL and media surface APIs. +The OpenCL API may be used to execute kernels that read and/or write memory +objects that are also media surfaces. +An OpenCL image object may be created from a media surface. +OpenCL memory objects may be created from media surfaces if and only if the +OpenCL context has been created from a media adapter. + + +==== Querying OpenCL Devices Corresponding to Media Adapters + +Media adapters are an abstraction associated with devices that provide media +capabilities. +Adapters with associated OpenCL devices can enable media surface sharing +between the two. + + +[open,refpage='clGetDeviceIDsFromDX9MediaAdapterKHR',desc='Query media adapter for any associated OpenCL devices',type='protos'] +-- +To query a media adapter for any associated OpenCL devices, call the +function + +include::{generated}/api/protos/clGetDeviceIDsFromDX9MediaAdapterKHR.txt[] +include::{generated}/api/version-notes/clGetDeviceIDsFromDX9MediaAdapterKHR.asciidoc[] + + * _platform_ refers to the platform ID returned by {clGetPlatformIDs}. + * _num_media_adapters_ specifies the number of media adapters. + * _media_adapters_type_ is an array of _num_media_adapters_ entries. + Each entry specifies the type of media adapter and must be one of the + values described in the <> below. + * _media_adapters_ is an array of _num_media_adapters_ entries. + Each entry specifies the actual adapter whose type is specified by + _media_adapter_type_. + The _media_adapters_ must be one of the types described in the + <> + table. + * _media_adapter_set_ specifies the set of adapters to return and must be + one of the values described in the <> table. + * _num_entries_ is the number of {cl_device_id_TYPE} entries that can be + added to _devices_. + If _devices_ is not `NULL`, the _num_entries_ must be greater than zero. + * _devices_ returns a list of OpenCL devices found that support the list + of media adapters specified. + The {cl_device_id_TYPE} values returned in _devices_ can be used to + identify a specific OpenCL device. + If _devices_ argument is `NULL`, this argument is ignored. + The number of OpenCL devices returned is the minimum of the value + specified by _num_entries_ or the number of OpenCL devices whose type + matches _device_type_. + * _num_devices_ returns the number of OpenCL devices. + If _num_devices_ is `NULL`, this argument is ignored. + +[[dx9-media-adapter-types]] +.DirectX 9 object types that may be used by {clGetDeviceIDsFromDX9MediaAdapterKHR} +[cols=",",options="header",] +|==== +| {cl_dx9_media_adapter_type_khr_TYPE} | Type of Media Adapter +| {CL_ADAPTER_D3D9_KHR_anchor} + +include::{generated}/api/version-notes/CL_ADAPTER_D3D9_KHR.asciidoc[] + | `IDirect3DDevice9 *` +| {CL_ADAPTER_D3D9EX_KHR_anchor} + +include::{generated}/api/version-notes/CL_ADAPTER_D3D9EX_KHR.asciidoc[] + | `IDirect3DDevice9Ex *` +| {CL_ADAPTER_DXVA_KHR_anchor} + +include::{generated}/api/version-notes/CL_ADAPTER_DXVA_KHR.asciidoc[] + | `IDXVAHD_Device *` +|==== + +[[dx9-media-adapter-sets]] +.Sets of devices queriable using {clGetDeviceIDsFromDX9MediaAdapterKHR} +[cols=",",options="header",] +|==== +| {cl_dx9_media_adapter_set_khr_TYPE} | Description +| {CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR_anchor} + +include::{generated}/api/version-notes/CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR.asciidoc[] + | The preferred OpenCL devices associated with the media adapter. +| {CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR_anchor} + +include::{generated}/api/version-notes/CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR.asciidoc[] + | All OpenCL devices that may interoperate with the media adapter +|==== + +// refError + +{clGetDeviceIDsFromDX9MediaAdapterKHR} returns {CL_SUCCESS} if the function +is executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. + * {CL_INVALID_VALUE} if _num_media_adapters_ is zero or if + _media_adapters_type_ is `NULL` or if _media_adapters_ is `NULL`. + * {CL_INVALID_VALUE} if any of the entries in _media_adapters_type_ or + _media_adapters_ is not a valid value. + * {CL_INVALID_VALUE} if _media_adapter_set_ is not a valid value. + * {CL_INVALID_VALUE} if _num_entries_ is equal to zero and _devices_ is + not `NULL` or if both _num_devices_ and _devices_ are `NULL`. + * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to adapters + specified in _media_adapters_ and _media_adapters_type_ were found. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_dx9_media_sharing[] + + +ifdef::cl_khr_d3d10_sharing[] +=== Sharing Direct3D 10 Resources With OpenCL Memory Objects + +This section discusses OpenCL functions that allow applications to use +Direct3D 10 resources as OpenCL memory objects. +This allows efficient sharing of data between OpenCL and Direct3D 10. +The OpenCL API may be used to execute kernels that read and/or write memory +objects that are also Direct3D 10 resources. +An OpenCL image object may be created from a Direct3D 10 texture resource. +An OpenCL buffer object may be created from a Direct3D 10 buffer resource. +OpenCL memory objects may be created from Direct3D 10 objects if and only if +the OpenCL context has been created from a Direct3D 10 device. + +==== Querying OpenCL Devices Corresponding to Direct3D 10 Devices + +The OpenCL devices corresponding to a Direct3D 10 device may be queried. +The OpenCL devices corresponding to a DXGI adapter may also be queried. +The OpenCL devices corresponding to a Direct3D 10 device will be a subset of +the OpenCL devices corresponding to the DXGI adapter against which the +Direct3D 10 device was created. + +[open,refpage='clGetDeviceIDsFromD3D10KHR',desc='Query OpenCL devices corresponding to a Direct3D 10 or DXGI device',type='protos'] +-- +To query OpenCL devices corresponding to a Direct3D 10 device or a DXGI +device, call the function + +include::{generated}/api/protos/clGetDeviceIDsFromD3D10KHR.txt[] +include::{generated}/api/version-notes/clGetDeviceIDsFromD3D10KHR.asciidoc[] + + * _platform_ refers to the platform ID returned by {clGetPlatformIDs}. + * _d3d_device_source_ specifies the type of _d3d_object_, and must be one + of the values shown in the <> + table. + * _d3d_object_ specifies the object whose corresponding OpenCL devices are + being queried. + The type of _d3d_object_ must be as specified in the + <> table. + * _d3d_device_set_ specifies the set of devices to return, and must be one + of the values shown in the <> table. + * _num_entries_ is the number of {cl_device_id_TYPE} entries that can be + added to _devices_. + If _devices_ is not `NULL` then _num_entries_ must be greater than zero. + * _devices_ returns a list of OpenCL devices found. + The {cl_device_id_TYPE} values returned in _devices_ can be used to + identify a specific OpenCL device. + If _devices_ is `NULL`, this argument is ignored. + The number of OpenCL devices returned is the minimum of the value + specified by _num_entries_ and the number of OpenCL devices + corresponding to _d3d_object_. + * _num_devices_ returns the number of OpenCL devices available that + correspond to _d3d_object_. + If _num_devices_ is `NULL`, this argument is ignored. + +[[d3d10-device-object-types-table]] +.Direct3D 10 object types that may be used by {clGetDeviceIDsFromD3D10KHR} +[cols=",",options="header",] +|==== +| {cl_d3d10_device_source_khr_TYPE} | Type of _d3d_object_ +| {CL_D3D10_DEVICE_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D10_DEVICE_KHR.asciidoc[] + | `ID3D10Device *` +| {CL_D3D10_DXGI_ADAPTER_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D10_DXGI_ADAPTER_KHR.asciidoc[] + | `IDXGIAdapter *` +|==== + +[[d3d10-device-sets-table]] +.Sets of devices queriable using {clGetDeviceIDsFromD3D10KHR} +[cols=",",options="header",] +|==== +| {cl_d3d10_device_set_khr_TYPE} | Devices returned in _devices_ +| {CL_PREFERRED_DEVICES_FOR_D3D10_KHR_anchor} + +include::{generated}/api/version-notes/CL_PREFERRED_DEVICES_FOR_D3D10_KHR.asciidoc[] + | The preferred OpenCL devices associated with the specified Direct3D + object. +| {CL_ALL_DEVICES_FOR_D3D10_KHR_anchor} + +include::{generated}/api/version-notes/CL_ALL_DEVICES_FOR_D3D10_KHR.asciidoc[] + | All OpenCL devices which may interoperate with the specified Direct3D + object. + Performance of sharing data on these devices may be considerably less + than on the preferred devices. +|==== + +// refError + +{clGetDeviceIDsFromD3D10KHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise it may return + + * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. + * {CL_INVALID_VALUE} if _d3d_device_source_ is not a valid value, + _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero + and _devices_ is not `NULL`, or if both _num_devices_ and _devices_ are + `NULL`. + * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to + _d3d_object_ were found. +-- +endif::cl_khr_d3d10_sharing[] + + +ifdef::cl_khr_d3d11_sharing[] +=== Sharing Direct3D 11 Resources With OpenCL Memory Objects + +This section discusses OpenCL functions that allow applications to use +Direct3D 11 resources as OpenCL memory objects. +This allows efficient sharing of data between OpenCL and Direct3D 11. +The OpenCL API may be used to execute kernels that read and/or write memory +objects that are also Direct3D 11 resources. +An OpenCL image object may be created from a Direct3D 11 texture resource. +An OpenCL buffer object may be created from a Direct3D 11 buffer resource. +OpenCL memory objects may be created from Direct3D 11 objects if and only if +the OpenCL context has been created from a Direct3D 11 device. + +==== Querying OpenCL Devices Corresponding to Direct3D 11 Devices + +The OpenCL devices corresponding to a Direct3D 11 device may be queried. +The OpenCL devices corresponding to a DXGI adapter may also be queried. +The OpenCL devices corresponding to a Direct3D 11 device will be a subset of +the OpenCL devices corresponding to the DXGI adapter against which the +Direct3D 11 device was created. + +[open,refpage='clGetDeviceIDsFromD3D11KHR',desc='Query OpenCL devices corresponding to a Direct3D 11 or DXGI device',type='protos'] +-- +To query OpenCL devices corresponding to a Direct3D 11 device or a DXGI +device, call the function + +include::{generated}/api/protos/clGetDeviceIDsFromD3D11KHR.txt[] +include::{generated}/api/version-notes/clGetDeviceIDsFromD3D11KHR.asciidoc[] + + * _platform_ refers to the platform ID returned by {clGetPlatformIDs}. + * _d3d_device_source_ specifies the type of _d3d_object_, and must be one + of the values shown in the <> + table. + * _d3d_object_ specifies the object whose corresponding OpenCL devices are + being queried. + The type of _d3d_object_ must be as specified in the + <> table. + * _d3d_device_set_ specifies the set of devices to return, and must be one + of the values shown in the <> table. + * _num_entries_ is the number of {cl_device_id_TYPE} entries that can be + added to _devices_. + If _devices_ is not `NULL` then _num_entries_ must be greater than zero. + * _devices_ returns a list of OpenCL devices found. + The {cl_device_id_TYPE} values returned in _devices_ can be used to + identify a specific OpenCL device. + If _devices_ is `NULL`, this argument is ignored. + The number of OpenCL devices returned is the minimum of the value + specified by _num_entries_ and the number of OpenCL devices + corresponding to _d3d_object_. + * _num_devices_ returns the number of OpenCL devices available that + correspond to _d3d_object_. + If _num_devices_ is `NULL`, this argument is ignored. + +[[d3d11-device-object-types-table]] +.Direct3D 11 object types that may be used by {clGetDeviceIDsFromD3D11KHR} +[cols=",",options="header",] +|==== +| {cl_d3d11_device_source_khr_TYPE} | Type of _d3d_object_ +| {CL_D3D11_DEVICE_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D11_DEVICE_KHR.asciidoc[] + | `ID3D11Device *` +| {CL_D3D11_DXGI_ADAPTER_KHR_anchor} + +include::{generated}/api/version-notes/CL_D3D11_DXGI_ADAPTER_KHR.asciidoc[] + | `IDXGIAdapter *` +|==== + +[[d3d11-device-sets-table]] +.Sets of devices queriable using {clGetDeviceIDsFromD3D11KHR} +[cols=",",options="header",] +|==== +| {cl_d3d11_device_set_khr_TYPE} | Devices returned in _devices_ +| {CL_PREFERRED_DEVICES_FOR_D3D11_KHR_anchor} + +include::{generated}/api/version-notes/CL_PREFERRED_DEVICES_FOR_D3D11_KHR.asciidoc[] + | The preferred OpenCL devices associated with the specified Direct3D + object. +| {CL_ALL_DEVICES_FOR_D3D11_KHR_anchor} + +include::{generated}/api/version-notes/CL_ALL_DEVICES_FOR_D3D11_KHR.asciidoc[] + | All OpenCL devices which may interoperate with the specified Direct3D + object. + Performance of sharing data on these devices may be considerably less + than on the preferred devices. +|==== + +// refError + +{clGetDeviceIDsFromD3D11KHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise it may return + + * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. + * {CL_INVALID_VALUE} if _d3d_device_source_ is not a valid value, + _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero + and _devices_ is not `NULL`, or if both _num_devices_ and _devices_ are + `NULL`. + * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to + _d3d_object_ were found. +-- +endif::cl_khr_d3d11_sharing[] + + == Partitioning a Device NOTE: Partitioning devices is <> version 1.2. @@ -1650,7 +2662,7 @@ include::{generated}/api/version-notes/CL_DEVICE_PARTITION_BY_COUNTS.asciidoc[] include::{generated}/api/version-notes/CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN.asciidoc[] | {cl_device_affinity_domain_TYPE} - | Split the device into smaller aggregate devices containing one or + | Split the device into smaller aggregate devices containing one or more compute units that all share part of a cache hierarchy. The value accompanying this property may be drawn from the following list: @@ -1822,10 +2834,11 @@ include::{generated}/api/version-notes/clCreateContext.asciidoc[] Each property name is immediately followed by the corresponding desired value. The list is terminated with 0. - The list of supported properties is described in the - <> table. - _properties_ can be `NULL` in which case the platform that is selected is - implementation-defined. + The list of supported properties, and their default values if not + present in _properties_, is described in the <> table. + _properties_ can be `NULL`, in which case all properties take on their + default values. * _num_devices_ is the number of devices specified in the _devices_ argument. * _devices_ is a pointer to a list of unique devices returned by {clGetDeviceIDs} or sub-devices created by {clCreateSubDevices} for a @@ -1868,6 +2881,8 @@ on one or more devices specified in the context. include::{generated}/api/version-notes/CL_CONTEXT_PLATFORM.asciidoc[] | {cl_platform_id_TYPE} | Specifies the platform to use. + + Defaults to an implementation-defined platform if not specified. | {CL_CONTEXT_INTEROP_USER_SYNC_anchor} include::{generated}/api/version-notes/CL_CONTEXT_INTEROP_USER_SYNC.asciidoc[] @@ -1878,10 +2893,154 @@ include::{generated}/api/version-notes/CL_CONTEXT_INTEROP_USER_SYNC.asciidoc[] Specification that describe sharing with other APIs for restrictions on using this flag. - If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified, a default of - {CL_FALSE} is assumed. + Defaults to {CL_FALSE} if not specified. + +ifdef::cl_khr_dx9_media_sharing[] +| {CL_CONTEXT_ADAPTER_D3D9_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_ADAPTER_D3D9_KHR.asciidoc[] + | `IDirect3DDevice9 *` + | Specifies an `IDirect3DDevice9` to use for D3D9 interop. +| {CL_CONTEXT_ADAPTER_D3D9EX_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_ADAPTER_D3D9EX_KHR.asciidoc[] + | `IDirect3DDeviceEx*` + | Specifies an `IDirect3DDevice9Ex` to use for D3D9 interop. +| {CL_CONTEXT_ADAPTER_DXVA_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_ADAPTER_DXVA_KHR.asciidoc[] + | `IDXVAHD_Device *` + | Specifies an `IDXVAHD_Device` to use for DXVA interop. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +| {CL_CONTEXT_D3D10_DEVICE_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_D3D10_DEVICE_KHR.asciidoc[] + | `ID3D10Device *` + | Specifies the `ID3D10Device *` to use for Direct3D 10 interoperability. + + The default value is `NULL`. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +| {CL_CONTEXT_D3D11_DEVICE_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_D3D11_DEVICE_KHR.asciidoc[] + | `ID3D11Device *` + | Specifies the `ID3D11Device *` to use for Direct3D 11 interoperability. + + The default value is `NULL`. +endif::cl_khr_d3d11_sharing[] + +ifdef::cl_khr_gl_sharing[] +| {CL_GL_CONTEXT_KHR_anchor} + +include::{generated}/api/version-notes/CL_GL_CONTEXT_KHR.asciidoc[] + | OpenGL context handle + | OpenGL context to associate the OpenCL context with + + Defaults to 0 if not specified. +| {CL_CGL_SHAREGROUP_KHR_anchor} + +include::{generated}/api/version-notes/CL_CGL_SHAREGROUP_KHR.asciidoc[] + | CGL share group handle + | CGL share group to associate the OpenCL context with + + Defaults to 0 if not specified. +| {CL_EGL_DISPLAY_KHR_anchor} + +include::{generated}/api/version-notes/CL_EGL_DISPLAY_KHR.asciidoc[] + | EGL `EGLDisplay` handle + | `EGLDisplay` an OpenGL context was created with respect to + + Defaults to `EGL_NO_DISPLAY` if not specified. +| {CL_GLX_DISPLAY_KHR_anchor} + +include::{generated}/api/version-notes/CL_GLX_DISPLAY_KHR.asciidoc[] + | X handle + | X Display an OpenGL context was created with respect to + + Defaults to `None` if not specified. +| {CL_WGL_HDC_KHR_anchor} + +include::{generated}/api/version-notes/CL_WGL_HDC_KHR.asciidoc[] + | Windows HDC handle + | HDC an OpenGL context was created with respect to + + Defaults to 0 if not specified. +endif::cl_khr_gl_sharing[] + +ifdef::cl_khr_initialize_memory[] +| {CL_CONTEXT_MEMORY_INITIALIZE_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_MEMORY_INITIALIZE_KHR.asciidoc[] + | {cl_context_memory_initialize_khr_TYPE} + | Describes which memory types for the context must be initialized. + This is a bit-field, where the following values are currently + supported: + + {CL_CONTEXT_MEMORY_INITIALIZE_LOCAL_KHR_anchor} -- Initialize local + memory to zeros. + + {CL_CONTEXT_MEMORY_INITIALIZE_PRIVATE_KHR_anchor} -- Initialize + private memory to zeros. +endif::cl_khr_initialize_memory[] + +ifdef::cl_khr_terminate_context[] +| {CL_CONTEXT_TERMINATE_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_TERMINATE_KHR.asciidoc[] + | {cl_bool_TYPE} + | Specifies whether the context can be terminated. + The default value is {CL_FALSE}. +endif::cl_khr_terminate_context[] |==== +ifdef::cl_khr_gl_sharing[] +Some of the properties specified in the <> table control sharing of OpenCL memory objects with OpenGL +buffer, texture, and renderbuffer objects. + +[[specifying-gl-context]] +Depending on the platform-specific API used to bind OpenGL contexts to the +window system, the following properties may be set to identify an OpenGL +context: + + * When the CGL binding API is supported, the property + {CL_CGL_SHAREGROUP_KHR} should be set to a CGLShareGroup handle to a CGL + share group object. + * When the EGL binding API is supported, the property {CL_GL_CONTEXT_KHR} + should be set to an EGLContext handle to an OpenGL ES or OpenGL context, + and the property {CL_EGL_DISPLAY_KHR} should be set to the `EGLDisplay` + handle of the display used to create the OpenGL ES or OpenGL context. + * When the GLX binding API is supported, the property {CL_GL_CONTEXT_KHR} + should be set to a GLXContext handle to an OpenGL context, and the + property {CL_GLX_DISPLAY_KHR} should be set to the `Display` handle of + the X Window System display used to create the OpenGL context. + * When the WGL binding API is supported, the property {CL_GL_CONTEXT_KHR} + should be set to an HGLRC handle to an OpenGL context, and the property + {CL_WGL_HDC_KHR} should be set to the HDC handle of the display used to + create the OpenGL context. + +Memory objects created in the context so specified may be shared with the +specified OpenGL or OpenGL ES context (as well as with any other OpenGL +contexts on the share list of that context, according to the description of +sharing in the GLX 1.4 and EGL 1.5 specifications, and the WGL documentation +for OpenGL implementations on Microsoft Windows), or with the explicitly +identified OpenGL share group for CGL. +If no OpenGL or OpenGL ES context or share group is specified in the +property list, then memory objects may not be shared, and attempts to create +such objects will result in a {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} +error. + +OpenCL / OpenGL sharing does not support the {CL_CONTEXT_INTEROP_USER_SYNC} +property defined in the <> +table. +Specifying this property when creating a context with OpenCL / OpenGL +sharing will return an appropriate error. +endif::cl_khr_gl_sharing[] + NOTE: There are a number of cases where error notifications need to be delivered due to an error that occurs outside a context. Such notifications may not be delivered through the _pfn_notify_ callback. @@ -1897,7 +3056,7 @@ returned in _errcode_ret_: * {CL_INVALID_PLATFORM} if no platform is specified in _properties_ and no platform could be selected, or if the platform specified in _properties_ is not a valid platform. - * {CL_INVALID_PROPERTY} if context property name in _properties_ is not a + * {CL_INVALID_PROPERTY} if a context property name in _properties_ is not a supported property name, if the value specified for a supported property name is not valid, or if the same property name is specified more than once. @@ -1914,6 +3073,92 @@ returned in _errcode_ret_: * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_dx9_media_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_DX9_MEDIA_ADAPTER_KHR} if any of the values of the + properties {CL_CONTEXT_ADAPTER_D3D9_KHR}, + {CL_CONTEXT_ADAPTER_D3D9EX_KHR} or {CL_CONTEXT_ADAPTER_DXVA_KHR} is + non-`NULL` and does not specify a valid media adapter with which the + _cl_device_ids_ against which this context is to be created may + interoperate. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D10_DEVICE_KHR} if the value of the property + {CL_CONTEXT_D3D10_DEVICE_KHR} is non-`NULL` and does not specify a valid + Direct3D 10 device with which the _cl_device_ids_ against which this + context is to be created may interoperate. + * {CL_INVALID_OPERATION} if Direct3D 10 interoperability is specified by + setting {CL_INVALID_D3D10_DEVICE_KHR} to a non-`NULL` value, and + interoperability with another graphics API is also specified. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D11_DEVICE_KHR} if the value of the property + {CL_CONTEXT_D3D11_DEVICE_KHR} is non-`NULL` and does not specify a valid + Direct3D 11 device with which the _cl_device_ids_ against which this + context is to be created may interoperate. + * {CL_INVALID_OPERATION} if Direct3D 11 interoperability is specified by + setting {CL_INVALID_D3D11_DEVICE_KHR} to a non-`NULL` value, and + interoperability with another graphics API is also specified. +endif::cl_khr_d3d11_sharing[] + +ifdef::cl_khr_gl_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a context was specified for + an OpenGL or OpenGL ES implementation using the EGL, GLX, or WGL binding + APIs, as <>; and any of the + following conditions hold: + ** The specified display and context properties do not identify a valid + OpenGL or OpenGL ES context. + ** The specified context does not support buffer and renderbuffer objects. + ** The specified context is not compatible with the OpenCL context being + created (for example, it exists in a physically distinct address space, + such as another hardware device; or it does not support sharing data + with OpenCL due to implementation restrictions). + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a share group was specified + for a CGL-based OpenGL implementation by setting the property + {CL_CGL_SHAREGROUP_KHR}, and the specified share group does not identify + a valid CGL share group object. + * {CL_INVALID_OPERATION} if a context was specified as described above and + any of the following conditions hold: + ** A context or share group object was specified for one of CGL, EGL, GLX, + or WGL and the OpenGL implementation does not support that + window-system binding API. + ** More than one of the properties {CL_CGL_SHAREGROUP_KHR}, + {CL_EGL_DISPLAY_KHR}, {CL_GLX_DISPLAY_KHR}, and {CL_WGL_HDC_KHR} is set + to a non-default value. + ** Both of the properties {CL_CGL_SHAREGROUP_KHR} and {CL_GL_CONTEXT_KHR} + are set to non-default values. + ** Any of the devices specified in the _devices_ argument cannot support + OpenCL objects which share the data store of an OpenGL object. + * {CL_INVALID_PROPERTY} if both {CL_CONTEXT_INTEROP_USER_SYNC}, and any of + the properties defined by the `<>` extension are + defined in _properties_. +endif::cl_khr_gl_sharing[] + +ifdef::cl_khr_terminate_context[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_PROPERTY} if the `<>` extension is + supported and {CL_CONTEXT_TERMINATE_KHR} is set to {CL_TRUE} in + _properties_, but not all of the devices associated with the context + support the ability to support context termination (i.e. + {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR} is set for + {CL_DEVICE_TERMINATE_CAPABILITY_KHR}). +endif::cl_khr_terminate_context[] + [NOTE] ==== It is possible that a device(s) becomes unavailable after a context and @@ -1940,10 +3185,11 @@ include::{generated}/api/version-notes/clCreateContextFromType.asciidoc[] corresponding values. Each property name is immediately followed by the corresponding desired value. - The list of supported properties is described in the - <> table. - _properties_ can also be `NULL` in which case the platform that is selected - is implementation-defined. + The list of supported properties, and their default values if not + present in _properties_, is described in the <> table. + _properties_ can be `NULL`, in which case all properties take on their + default values. * _device_type_ is a bit-field that identifies the type of device and is described in the <> table. * _pfn_notify_ and _user_data_ are described in {clCreateContext}. @@ -1965,7 +3211,7 @@ returned in _errcode_ret_: * {CL_INVALID_PLATFORM} if no platform is specified in _properties_ and no platform could be selected, or if the platform specified in _properties_ is not a valid platform. - * {CL_INVALID_PROPERTY} if context property name in _properties_ is not a + * {CL_INVALID_PROPERTY} if a context property name in _properties_ is not a supported property name, if the value specified for a supported property name is not valid, or if the same property name is specified more than once. @@ -1981,6 +3227,81 @@ returned in _errcode_ret_: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. + +ifdef::cl_khr_dx9_media_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_DX9_MEDIA_ADAPTER_KHR} if any of the values of the + properties {CL_CONTEXT_ADAPTER_D3D9_KHR}, + {CL_CONTEXT_ADAPTER_D3D9EX_KHR} or {CL_CONTEXT_ADAPTER_DXVA_KHR} is + non-`NULL` and does not specify a valid media adapter with which the + _cl_device_ids_ against which this context is to be created may + interoperate. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D10_DEVICE_KHR} if the value of the property + {CL_CONTEXT_D3D10_DEVICE_KHR} is non-`NULL` and does not specify a valid + Direct3D 10 device with which the _cl_device_ids_ against which this + context is to be created may interoperate. + * {CL_INVALID_OPERATION} if Direct3D 10 interoperability is specified by + setting {CL_INVALID_D3D10_DEVICE_KHR} to a non-`NULL` value, and + interoperability with another graphics API is also specified. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D11_DEVICE_KHR} if the value of the property + {CL_CONTEXT_D3D11_DEVICE_KHR} is non-`NULL` and does not specify a valid + Direct3D 11 device with which the _cl_device_ids_ against which this + context is to be created may interoperate. + * {CL_INVALID_OPERATION} if Direct3D 11 interoperability is specified by + setting {CL_INVALID_D3D11_DEVICE_KHR} to a non-`NULL` value, and + interoperability with another graphics API is also specified. +endif::cl_khr_d3d11_sharing[] + +ifdef::cl_khr_gl_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a context was specified for + an OpenGL or OpenGL ES implementation using the EGL, GLX, or WGL binding + APIs, as <>; and + any of the following conditions hold: + ** The specified display and context properties do not identify a valid + OpenGL or OpenGL ES context. + ** The specified context does not support buffer and renderbuffer objects. + ** The specified context is not compatible with the OpenCL context being + created (for example, it exists in a physically distinct address space, + such as another hardware device; or it does not support sharing data + with OpenCL due to implementation restrictions). + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a share group was specified + for a CGL-based OpenGL implementation by setting the property + {CL_CGL_SHAREGROUP_KHR}, and the specified share group does not identify + a valid CGL share group object. + * {CL_INVALID_OPERATION} if a context was specified as described above and + any of the following conditions hold: + ** A context or share group object was specified for one of CGL, EGL, GLX, + or WGL and the OpenGL implementation does not support that + window-system binding API. + ** More than one of the properties {CL_CGL_SHAREGROUP_KHR}, + {CL_EGL_DISPLAY_KHR}, {CL_GLX_DISPLAY_KHR}, and {CL_WGL_HDC_KHR} is set + to a non-default value. + ** Both of the properties {CL_CGL_SHAREGROUP_KHR} and {CL_GL_CONTEXT_KHR} + are set to non-default values. + ** Any of the devices specified in the _devices_ argument cannot support + OpenCL objects which share the data store of an OpenGL object. + * {CL_INVALID_PROPERTY} if both {CL_CONTEXT_INTEROP_USER_SYNC}, and any of + the properties defined by the `<>` extension are + defined in _properties_. +endif::cl_khr_gl_sharing[] + -- [open,refpage='clRetainContext',desc='Retain an OpenCL context',type='protos',xrefs='clCreateContext clReleaseContext'] @@ -2044,6 +3365,75 @@ Otherwise, it returns one of the following errors: required by the OpenCL implementation on the host. -- +ifdef::cl_khr_terminate_context[] +[open,refpage='clTerminateContextKHR',desc='Terminate pending work associated with a context and render data owned by the context invalid',type='protos'] +-- +To terminate all pending work associated with a context and render all data +owned by the context invalid, call the function + +include::{generated}/api/protos/clTerminateContextKHR.txt[] +include::{generated}/api/version-notes/clTerminateContextKHR.asciidoc[] + + * _context_ must be a valid OpenCL context. + +It is the responsibility of the application to release all objects +associated with the context being terminated. + +When a context is terminated: + + * The execution status of enqueued commands will be + {CL_CONTEXT_TERMINATED_KHR_anchor}. + Event objects can be queried using {clGetEventInfo}. + Event callbacks can be registered and registered event callbacks will be + called with _event_command_status_ set to {CL_CONTEXT_TERMINATED_KHR}. + {clWaitForEvents} will return as immediately for commands associated + with event objects specified in event_list. + The status of user events can be set. + Event objects can be retained and released. + {clGetEventProfilingInfo} returns {CL_PROFILING_INFO_NOT_AVAILABLE}. + * The context is considered to be terminated. + A callback function registered when the context was created will be + called. + Only queries, retain and release operations can be performed on the + context. + All other APIs that use a context as an argument will return + {CL_CONTEXT_TERMINATED_KHR}. + * The contents of the memory regions of the memory objects is undefined. + Queries, registering a destructor callback, retain and release + operations can be performed on the memory objects. + * Once a context has been terminated, all OpenCL API calls that create + objects or enqueue commands will return {CL_CONTEXT_TERMINATED_KHR}. + APIs that release OpenCL objects will continue to operate as though + {clTerminateContextKHR} was not called. + * The behavior of callbacks will remain unchanged, and will report + appropriate error, if executing after termination of context. + This behavior is similar to enqueued commands, after the command-queue + has become invalid. + +// refError + +{clTerminateContextKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid OpenCL context. + * {CL_CONTEXT_TERMINATED_KHR} if _context_ has already been terminated. + * {CL_INVALID_OPERATION} if _context_ was not created with + {CL_CONTEXT_TERMINATE_KHR} set to {CL_TRUE}. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +An implementation that supports this extension must be able to terminate +commands currently executing on devices or queued across all command-queues +associated with the context that is being terminated. +The implementation cannot implement this extension by waiting for currently +executing (or queued) commands to finish execution on devices associated +with this context (i.e. doing a {clFinish}). +-- +endif::cl_khr_terminate_context[] + [open,refpage='clGetContextInfo',desc='Query information about an OpenCL context',type='protos'] -- To query information about a context, call the function: @@ -2106,6 +3496,31 @@ include::{generated}/api/version-notes/CL_CONTEXT_PROPERTIES.asciidoc[] {clCreateContextFromType} used to create _context_ was `NULL`, the implementation must return _param_value_size_ret_ equal to 0, indicating that there are no properties to be returned. + +ifdef::cl_khr_d3d10_sharing[] +| {CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR.asciidoc[] + | {cl_bool_TYPE} + | Returns {CL_TRUE} if Direct3D 10 resources created as shared by + setting _MiscFlags_ to include `D3D10_RESOURCE_MISC_SHARED` will + perform faster when shared with OpenCL, compared with resources which + have not set this flag. + Otherwise returns {CL_FALSE}. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +| {CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR_anchor} + +include::{generated}/api/version-notes/CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR.asciidoc[] + | {cl_bool_TYPE} + | Returns {CL_TRUE} if Direct3D 11 resources created as shared by + setting _MiscFlags_ to include `D3D11_RESOURCE_MISC_SHARED` will + perform faster when shared with OpenCL, compared with resources which + have not set this flag. + Otherwise returns {CL_FALSE}. +endif::cl_khr_d3d11_sharing[] + |==== // refError diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 18902d1b..29862064 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2017-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 [[opencl-runtime]] = The OpenCL Runtime @@ -25,20 +24,26 @@ Sharing of objects across multiple command-queues will require the application to perform appropriate synchronization. This is described in <> -[open,refpage='clCreateCommandQueueWithProperties',desc='Create a host or device command-queue on a specific device.',type='protos'] +[open,refpage='clCreateCommandQueueWithProperties',desc='Create a host or device command-queue on a specific device.',type='protos',alias='clCreateCommandQueueWithPropertiesKHR'] -- To create a host or device command-queue on a specific device, call the function include::{generated}/api/protos/clCreateCommandQueueWithProperties.txt[] include::{generated}/api/version-notes/clCreateCommandQueueWithProperties.asciidoc[] -Also see extension *cl_khr_create_command_queue*. + +ifdef::cl_khr_create_command_queue[] +or the equivalent + +include::{generated}/api/protos/clCreateCommandQueueWithPropertiesKHR.txt[] +include::{generated}/api/version-notes/clCreateCommandQueueWithPropertiesKHR.asciidoc[] +endif::cl_khr_create_command_queue[] * _context_ must be a valid OpenCL context. * _device_ must be a device or sub-device associated with _context_. It can either be in the list of devices and sub-devices specified when - _context_ is created using {clCreateContext} or be a root device with the - same device type as specified when _context_ is created using + _context_ is created using {clCreateContext} or be a root device with + the same device type as specified when _context_ is created using {clCreateContextFromType}. * _properties_ specifies a list of properties for the command-queue and their corresponding values. @@ -63,6 +68,10 @@ Also see extension *cl_khr_create_command_queue*. include::{generated}/api/version-notes/CL_QUEUE_PROPERTIES.asciidoc[] | {cl_command_queue_properties_TYPE} + +ifdef::cl_khr_create_command_queue[] +or {cl_bitfield_TYPE} if the `<>` extension is supported +endif::cl_khr_create_command_queue[] | This is a bitfield and can be set to a combination of the following values: @@ -71,12 +80,14 @@ include::{generated}/api/version-notes/CL_QUEUE_PROPERTIES.asciidoc[] out-of-order. If set, the commands in the command-queue are executed out-of-order. Otherwise, commands are executed in-order. + include::{generated}/api/version-notes/CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE.asciidoc[] {CL_QUEUE_PROFILING_ENABLE_anchor} - Enable or disable profiling of commands in the command-queue. If set, the profiling of commands is enabled. Otherwise profiling of commands is disabled. + include::{generated}/api/version-notes/CL_QUEUE_PROFILING_ENABLE.asciidoc[] {CL_QUEUE_ON_DEVICE_anchor} - Indicates that this is a device queue. @@ -84,12 +95,14 @@ include::{generated}/api/version-notes/CL_QUEUE_PROFILING_ENABLE.asciidoc[] {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} footnote:[{fn-out-of-order-device-queue}] must also be set. + include::{generated}/api/version-notes/CL_QUEUE_ON_DEVICE.asciidoc[] {CL_QUEUE_ON_DEVICE_DEFAULT_anchor} footnote:[{fn-default-device-queue}] - indicates that this is the default device queue. This can only be used with {CL_QUEUE_ON_DEVICE}. + include::{generated}/api/version-notes/CL_QUEUE_ON_DEVICE_DEFAULT.asciidoc[] If {CL_QUEUE_PROPERTIES} is not specified an in-order host command-queue @@ -109,6 +122,55 @@ include::{generated}/api/version-notes/CL_QUEUE_SIZE.asciidoc[] If {CL_QUEUE_SIZE} is not specified, the device queue is created with {CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE} as the size of the queue. + +ifdef::cl_khr_priority_hints[] +| {CL_QUEUE_PRIORITY_KHR_anchor} + +include::{generated}/api/version-notes/CL_QUEUE_PRIORITY_KHR.asciidoc[] + | {cl_queue_priority_khr_TYPE} + | Specifies a priority hint for command queues belonging to the same + OpenCL context. + + NOTE: Refer to the user guide associated with each implementation + supporting this extension for its priority behavior guarantees, if + any. + + {CL_QUEUE_PRIORITY_HIGH_KHR_anchor} - Indicates command queues + should have high priority. + + {CL_QUEUE_PRIORITY_MED_KHR_anchor} - Indicates command queues should + have medium priority. + + {CL_QUEUE_PRIORITY_LOW_KHR_anchor} - Indicates command queues should + have low priority. + + If {CL_QUEUE_PRIORITY_KHR} is not specified, the default priority + {CL_QUEUE_PRIORITY_MED_KHR} is used. +endif::cl_khr_priority_hints[] + +ifdef::cl_khr_throttle_hints[] +| {CL_QUEUE_THROTTLE_KHR_anchor} + +include::{generated}/api/version-notes/CL_QUEUE_THROTTLE_KHR.asciidoc[] + | {cl_queue_throttle_khr_TYPE} + | Specifies a throttle hint for a command queue. + + NOTE: Refer to the user guide associated with each implementation + supporting this extension for its throttling behavior guarantees, if + any. + + {CL_QUEUE_THROTTLE_HIGH_KHR_anchor} - Indicates the queue should + execute at full throttle, which may consume more energy. + + {CL_QUEUE_THROTTLE_MED_KHR_anchor} - Indicates normal throttling + behavior. + + {CL_QUEUE_THROTTLE_LOW_KHR_anchor} - Indicates the queue should + execute at low throttle, optimized for lowest energy consumption. + + If {CL_QUEUE_THROTTLE_KHR} is not specified, the default priority + {CL_QUEUE_THROTTLE_MED_KHR} is used. +endif::cl_khr_throttle_hints[] |==== // refError @@ -120,13 +182,23 @@ Otherwise, it returns a `NULL` value with one of the following error values returned in _errcode_ret_: * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_DEVICE} if _device_ is not a valid device or is not associated - with _context_. + * {CL_INVALID_DEVICE} if _device_ is not a valid device or is not + associated with _context_. * {CL_INVALID_VALUE} if values specified in _properties_ are not valid. * {CL_INVALID_QUEUE_PROPERTIES} if values specified in _properties_ are valid but are not supported by the device. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. +ifdef::cl_khr_priority_hints[] + * {CL_INVALID_QUEUE_PROPERTIES} if the `<>` + extension is supported, the {CL_QUEUE_PRIORITY_KHR} property is + specified, and the queue is a {CL_QUEUE_ON_DEVICE}. +endif::cl_khr_priority_hints[] +ifdef::cl_khr_throttle_hints[] + * {CL_INVALID_QUEUE_PROPERTIES} if the `<>` + extension is supported, the {CL_QUEUE_THROTTLE_KHR} property is + specified, and the queue is a {CL_QUEUE_ON_DEVICE}. +endif::cl_khr_throttle_hints[] + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- @@ -169,7 +241,6 @@ include::{generated}/api/version-notes/clCreateCommandQueue.asciidoc[] If set, the profiling of commands is enabled. Otherwise profiling of commands is disabled. |==== - * _errcode_ret_ will return an appropriate error code. If _errcode_ret_ is `NULL`, no error code is returned. @@ -425,8 +496,8 @@ include::{generated}/api/version-notes/clSetCommandQueueProperty.asciidoc[] enabled (if _enable_ is {CL_TRUE}) or disabled (if _enable_ is {CL_FALSE}) for the command-queue. * _old_properties_ returns the command-queue properties before they were - changed by {clSetCommandQueueProperty}. If _old_properties_ is `NULL`, it - is ignored. + changed by {clSetCommandQueueProperty}. If _old_properties_ is `NULL`, + it is ignored. // refError @@ -478,7 +549,9 @@ include::{generated}/api/version-notes/clCreateBufferWithProperties.asciidoc[] * _properties_ is an optional list of properties for the buffer object and their corresponding values. The list is terminated with the special property `0`. If no properties are required, _properties_ may be `NULL`. - OpenCL 3.0 does not define any optional properties for buffers. + OpenCL 3.0 does not define any optional properties for buffers, + but extensions may define properties as described in the + <>. * _flags_ is a bit-field that is used to specify allocation and usage information about the image memory object being created and is described in the <> table. @@ -510,6 +583,38 @@ Locations in the buffers underlying shared memory can be operated on using atomic operations to the devices level of support as defined in the memory model. +[[external-buffer-memory-properties-table]] +.List of supported buffer creation properties +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Property | Property Value | Description + +ifdef::cl_khr_external_memory[] +| {CL_MEM_DEVICE_HANDLE_LIST_KHR_anchor} + +include::{generated}/api/version-notes/CL_MEM_DEVICE_HANDLE_LIST_KHR.asciidoc[] + | {cl_device_id_TYPE}[] + | Specifies the list of OpenCL devices (terminated with + {CL_MEM_DEVICE_HANDLE_LIST_END_KHR}) to associate with the external + memory handle. +endif::cl_khr_external_memory[] +|==== + +ifdef::cl_khr_external_memory[] +If {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of _properties_, +the memory object created by {clCreateBufferWithProperties} or +{clCreateImageWithProperties} is by default accessible to all devices in the +_context_. + +The properties used to create a buffer from an external memory handle are +<>. +When a buffer is created from an external memory handle, the +_flags_ used to specify usage information for the buffer must not +include {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or +{CL_MEM_COPY_HOST_PTR}, and the _host_ptr_ argument must be `NULL`. +endif::cl_khr_external_memory[] + // refError {clCreateBuffer} and {clCreateBufferWithProperties} returns a valid non-zero @@ -538,6 +643,26 @@ returned in _errcode_ret_: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_external_memory[] + * {CL_INVALID_DEVICE} + ** if a device identified by the property {CL_MEM_DEVICE_HANDLE_LIST_KHR} + is not a valid device or is not associated with _context_, or + ** if a device identified by property {CL_MEM_DEVICE_HANDLE_LIST_KHR} + cannot import the requested external memory object type, or + ** if {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of + _properties_ and one or more devices in _context_ cannot import the + requested external memory object type. + * {CL_INVALID_VALUE} + ** if _properties_ includes a supported external memory handle and _flags_ + includes {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or + {CL_MEM_COPY_HOST_PTR}. + * {CL_INVALID_HOST_PTR} + ** if _properties_ includes a supported external memory handle and + _host_ptr_ is not `NULL`. + * {CL_INVALID_PROPERTY} + ** if _properties_ does not include a supported external memory handle and + {CL_MEM_DEVICE_HANDLE_LIST_KHR} is specified as part of _properties_. +endif::cl_khr_external_memory[] [[memory-flags-table]] .List of supported memory flag values @@ -1583,6 +1708,188 @@ include::{generated}/api/version-notes/CL_MAP_WRITE_INVALIDATE_REGION.asciidoc[] -- +ifdef::cl_khr_d3d10_sharing[] +=== Creating OpenCL Buffer Objects From Direct3D 10 Buffer Resources + +[open,refpage='clCreateFromD3D10BufferKHR',desc='Create OpenCL buffer object from a Direct3D 10 buffer',type='protos'] +-- +To create an OpenCL buffer object from a Direct3D 10 buffer, call the +function + +include::{generated}/api/protos/clCreateFromD3D10BufferKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D10BufferKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 10 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 10 buffer to share. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +The size of the returned OpenCL buffer object is the same as the size of +_resource_. +This call will increment the internal Direct3D 10 reference count on +_resource_. +The internal Direct3D 10 reference count on _resource_ will be decremented when +the OpenCL reference count on the returned OpenCL memory object drops to +zero. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromD3D10BufferKHR} returns a valid non-zero OpenCL buffer object +and _errcode_ret_ is set to {CL_SUCCESS} if the buffer object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. + * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 + buffer resource, if _resource_ was created with the D3D10_USAGE flag + D3D10_USAGE_IMMUTABLE, if a {cl_mem_TYPE} from _resource_ has already + been created using {clCreateFromD3D10BufferKHR}, or if _context_ was not + created against the same Direct3D 10 device from which _resource_ was + created. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_d3d10_sharing[] + + +ifdef::cl_khr_d3d11_sharing[] +=== Creating OpenCL Buffer Objects From Direct3D 11 Buffer Resources + +[open,refpage='clCreateFromD3D11BufferKHR',desc='Create OpenCL buffer object from a Direct3D 11 buffer',type='protos'] +-- +To create an OpenCL buffer object from a Direct3D 11 buffer, call the +function + +include::{generated}/api/protos/clCreateFromD3D11BufferKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D11BufferKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 11 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 11 buffer to share. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +The size of the returned OpenCL buffer object is the same as the size of +_resource_. +This call will increment the internal Direct3D 11 reference count on +_resource_. +The internal Direct3D 11 reference count on _resource_ will be decremented when +the OpenCL reference count on the returned OpenCL memory object drops to +zero. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromD3D11BufferKHR} returns a valid non-zero OpenCL buffer object +and _errcode_ret_ is set to {CL_SUCCESS} if the buffer object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. + * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 + buffer resource, if _resource_ was created with the D3D11_USAGE flag + D3D11_USAGE_IMMUTABLE, if a {cl_mem_TYPE} from _resource_ has already + been created using {clCreateFromD3D11BufferKHR}, or if _context_ was not + created against the same Direct3D 11 device from which _resource_ was + created. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_d3d11_sharing[] + + +ifdef::cl_khr_gl_sharing[] +=== Creating OpenCL Buffer Objects From OpenGL Buffer Objects + +[open,refpage='clCreateFromGLBuffer',desc='Create OpenCL buffer object from an OpenGL buffer object',type='protos'] +-- +To create an OpenCL buffer object from an OpenGL buffer object, call the +function + +include::{generated}/api/protos/clCreateFromGLBuffer.txt[] +include::{generated}/api/version-notes/clCreateFromGLBuffer.asciidoc[] + + * _context_ is a valid OpenCL context created from an OpenGL context. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a + description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _bufobj_ is the name of an OpenGL buffer object. + The data store of the OpenGL buffer object must have have been + previously created by calling `glBufferData`, although its contents need + not be initialized. + The size of the data store will be used to determine the size of the + OpenCL buffer object. + * _errcode_ret_ will return an appropriate error code as described below. + If _errcode_ret_ is `NULL`, no error code is returned. + +The size of the OpenGL buffer object data store at the time +{clCreateFromGLBuffer} is called will be used as the size of buffer object +returned by {clCreateFromGLBuffer}. +If the state of an OpenGL buffer object is modified through the OpenGL API +(e.g. `glBufferData`) while there exists a corresponding OpenCL buffer +object, subsequent use of the OpenCL buffer object will result in undefined +behavior. + +The {clRetainMemObject} and {clReleaseMemObject} functions can be used to +retain and release the buffer object. + +The OpenCL buffer object created using {clCreateFromGLBuffer} can also be +used to create an OpenCL 1D image buffer object. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromGLBuffer} returns a valid non-zero OpenCL buffer object and +_errcode_ret_ is set to {CL_SUCCESS} if the buffer object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context or was not + created from an OpenGL context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. + * {CL_INVALID_GL_OBJECT} if _bufobj_ is not an OpenGL buffer object or is a + OpenGL buffer object but does not have an existing data store or the + size of the buffer is 0. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_gl_sharing[] + + == Image Objects An _image_ object is used to store a one-, two- or three-dimensional @@ -1610,7 +1917,9 @@ include::{generated}/api/version-notes/clCreateImageWithProperties.asciidoc[] * _properties_ is an optional list of properties for the image object and their corresponding values. The list is terminated with the special property `0`. If no properties are required, _properties_ may be `NULL`. - OpenCL 3.0 does not define any optional properties for images. + OpenCL 3.0 does not define any optional properties for images, + but extensions may define properties as described in the + <>. * _flags_ is a bit-field that is used to specify allocation and usage information about the image memory object being created and is described in the <> table. @@ -1655,6 +1964,32 @@ If the {CL_MEM_HOST_WRITE_ONLY}, {CL_MEM_HOST_READ_ONLY} or inherited from the corresponding memory access qualifiers associated with __mem_object__. +ifdef::cl_khr_mipmap_image[] +*Mipmap Images* + +A mipmapped 1D image, 1D image array, 2D image, 2D image array or 3D image +is created by specifying _num_mip_levels_ to be a value greater than one in +_image_desc_. +The dimensions of a mipmapped image can be a power of two or a non-power of +two. +Each successively smaller mipmap level is half the size of the previous +level, rounded down to the nearest integer. + +The following restrictions apply when mipmapped images are created with +{clCreateImage}: + +// TODO The actual errors returned from clCreateImage are not specified by +// the cl_khr_mipmap_image extension + + * {CL_MEM_USE_HOST_PTR} or {CL_MEM_COPY_HOST_PTR} cannot be specified if a + mipmapped image is created. + * The _host_ptr_ argument to {clCreateImage} must be a `NULL` value. + * Mip-mapped images cannot be created for {CL_MEM_OBJECT_IMAGE1D_BUFFER} + images, depth images or multi-sampled (i.e. msaa) images. +endif::cl_khr_mipmap_image[] + +*Image Data in Host Memory* + For a 3D image or 2D image array, the image data specified by _host_ptr_ is stored as a linear sequence of adjacent 2D image slices or 2D images respectively. @@ -1676,6 +2011,42 @@ stored as a single scanline which is a linear sequence of adjacent elements. Image elements are stored according to their image format as described in the <> section. +[[external-image-memory-properties-table]] +.List of supported image creation properties +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Property | Property Value | Description +ifdef::cl_khr_external_memory[] +| {CL_MEM_DEVICE_HANDLE_LIST_KHR} + +include::{generated}/api/version-notes/CL_MEM_DEVICE_HANDLE_LIST_KHR.asciidoc[] + | {cl_device_id_TYPE}[] + | Specifies the list of OpenCL devices (terminated with + {CL_MEM_DEVICE_HANDLE_LIST_END_KHR}) to associate with the external + memory handle. +endif::cl_khr_external_memory[] +|==== + +ifdef::cl_khr_external_memory[] +If {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of _properties_, +the memory object created by {clCreateBufferWithProperties} or +{clCreateImageWithProperties} is by default accessible to all devices in the +_context_. + +The properties used to create an image from an external memory handle are +<>. +When an image is created from an external memory handle, the +_flags_ used to specify usage information for the image must not +include {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or +{CL_MEM_COPY_HOST_PTR}, and the _host_ptr_ argument must be `NULL`. +When images are created from an external memory handle, implementations may +acquire information about image attributes such as format and layout at the +time of creation. +When such information is acquired at image creation time, it is used for the +lifetime of the image object. +endif::cl_khr_external_memory[] + // refError {clCreateImage} and {clCreateImageWithProperties} returns a valid non-zero @@ -1731,6 +2102,26 @@ returned in _errcode_ret_: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_external_memory[] + * {CL_INVALID_DEVICE} + ** if a device identified by the property {CL_MEM_DEVICE_HANDLE_LIST_KHR} + is not a valid device or is not associated with _context_, or + ** if a device identified by property {CL_MEM_DEVICE_HANDLE_LIST_KHR} + cannot import the requested external memory object type, or + ** if {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of + _properties_ and one or more devices in _context_ cannot import the + requested external memory object type. + * {CL_INVALID_VALUE} + ** if _properties_ includes a supported external memory handle and _flags_ + includes {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or + {CL_MEM_COPY_HOST_PTR}. + * {CL_INVALID_HOST_PTR} + ** if _properties_ includes a supported external memory handle and + _host_ptr_ is not `NULL`. + * {CL_INVALID_PROPERTY} + ** if _properties_ does not include a supported external memory handle and + {CL_MEM_DEVICE_HANDLE_LIST_KHR} is specified as part of _properties_. +endif::cl_khr_external_memory[] [[host-ptr-buffer-size-table]] .Required _host_ptr_ buffer sizes for images @@ -1935,11 +2326,11 @@ format, and is defined as: include::{generated}/api/structs/cl_image_format.txt[] - * `image_channel_order` specifies the number of channels and the channel + * _image_channel_order_ specifies the number of channels and the channel layout i.e. the memory layout in which channels are stored in the image. Valid values are described in the <> table. - * `image_channel_data_type` describes the size of the channel data type. + * _image_channel_data_type_ describes the size of the channel data type. The list of supported values is described in the <> table. The number of bits per element determined by the `image_channel_data_type` @@ -1958,6 +2349,10 @@ include::{generated}/api/version-notes/CL_R.asciidoc[] | {CL_DEPTH_anchor} include::{generated}/api/version-notes/CL_DEPTH.asciidoc[] + +ifdef::cl_khr_depth_images[] +Also supported if the `<>` extension is supported. +endif::cl_khr_depth_images[] | A single channel image format where the single channel represents a `DEPTH` component. | {CL_LUMINANCE_anchor} @@ -1980,7 +2375,23 @@ include::{generated}/api/version-notes/CL_RG.asciidoc[] | {CL_Rx_anchor} include::{generated}/api/version-notes/CL_Rx.asciidoc[] - | A two channel image format, where the first channel represents a `RED` component and the second channel is ignored. + | A two channel image format, where the first channel represents a `RED` + component and the second channel is ignored. + +ifdef::cl_khr_gl_depth_images[] +| {CL_DEPTH_STENCIL_anchor} + +include::{generated}/api/version-notes/CL_DEPTH_STENCIL.asciidoc[] + | A two channel image format, where the first channel represents + a `DEPTH` component and the second channel represents + a stencil component. + This format can only be used if the image channel data type is + {CL_UNORM_INT24} or {CL_FLOAT}. + + See <>. +endif::cl_khr_gl_depth_images[] + | {CL_RGB_anchor} include::{generated}/api/version-notes/CL_RGB.asciidoc[] @@ -2037,6 +2448,10 @@ include::{generated}/api/version-notes/CL_UNORM_INT8.asciidoc[] | {CL_UNORM_INT16_anchor} include::{generated}/api/version-notes/CL_UNORM_INT16.asciidoc[] + +ifdef::cl_khr_depth_images[] +Also supported if the `<>` extension is supported. +endif::cl_khr_depth_images[] | Each channel component is a normalized unsigned 16-bit integer value | {CL_UNORM_SHORT_565_anchor} @@ -2078,6 +2493,14 @@ include::{generated}/api/version-notes/CL_UNSIGNED_INT8.asciidoc[] include::{generated}/api/version-notes/CL_UNSIGNED_INT16.asciidoc[] | Each channel component is an unnormalized unsigned 16-bit integer value + +ifdef::cl_khr_gl_depth_images[] +| {CL_UNORM_INT24_anchor} + +include::{generated}/api/version-notes/CL_UNORM_INT24.asciidoc[] + | Each channel component is a normalized unsigned 24-bit integer value +endif::cl_khr_gl_depth_images[] + | {CL_UNSIGNED_INT32_anchor} include::{generated}/api/version-notes/CL_UNSIGNED_INT32.asciidoc[] @@ -2089,6 +2512,10 @@ include::{generated}/api/version-notes/CL_HALF_FLOAT.asciidoc[] | {CL_FLOAT_anchor} include::{generated}/api/version-notes/CL_FLOAT.asciidoc[] + +ifdef::cl_khr_depth_images[] +Also supported if the `<>` extension is supported. +endif::cl_khr_depth_images[] | Each channel component is a single precision floating-point value |==== @@ -2162,11 +2589,11 @@ and dimensions of an image or image array when creating an image using include::{generated}/api/structs/cl_image_desc.txt[] - * `image_type` describes the image type and must be either + * _image_type_ describes the image type and must be either {CL_MEM_OBJECT_IMAGE1D}, {CL_MEM_OBJECT_IMAGE1D_BUFFER}, {CL_MEM_OBJECT_IMAGE1D_ARRAY}, {CL_MEM_OBJECT_IMAGE2D}, {CL_MEM_OBJECT_IMAGE2D_ARRAY}, or {CL_MEM_OBJECT_IMAGE3D}. - * `image_width` is the width of the image in pixels. + * _image_width_ is the width of the image in pixels. For a 2D image and image array, the image width must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE2D_MAX_WIDTH}. For a 3D image, the image width must be a value {geq} 1 and {leq} @@ -2175,44 +2602,66 @@ include::{generated}/api/structs/cl_image_desc.txt[] {CL_DEVICE_IMAGE_MAX_BUFFER_SIZE}. For a 1D image and 1D image array, the image width must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE2D_MAX_WIDTH}. - * `image_height` is the height of the image in pixels. + * _image_height_ is the height of the image in pixels. This is only used if the image is a 2D or 3D image, or a 2D image array. For a 2D image or image array, the image height must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE2D_MAX_HEIGHT}. For a 3D image, the image height must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE3D_MAX_HEIGHT}. - * `image_depth` is the depth of the image in pixels. + * _image_depth_ is the depth of the image in pixels. This is only used if the image is a 3D image and must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE3D_MAX_DEPTH}. - * `image_array_size` footnote:[{fn-image-array-performance}] is the number of + * _image_array_size_ footnote:[{fn-image-array-performance}] is the number of images in the image array. This is only used if the image is a 1D or 2D image array. The values for `image_array_size`, if specified, must be a value {geq} 1 and {leq} {CL_DEVICE_IMAGE_MAX_ARRAY_SIZE}. - * `image_row_pitch` is the scan-line pitch in bytes. - This must be 0 if _host_ptr_ is `NULL` and can be either 0 or {geq} - `image_width` {times} size of element in bytes if _host_ptr_ is not `NULL`. - If _host_ptr_ is not `NULL` and `image_row_pitch` = 0, `image_row_pitch` is - calculated as `image_width` {times} size of element in bytes. - If `image_row_pitch` is not 0, it must be a multiple of the image element - size in bytes. - For a 2D image created from a buffer, the pitch specified (or computed if - pitch specified is 0) must be a multiple of the maximum of the - {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} value for all devices in the context - associated with the buffer specified by `mem_object` that support images. - * `image_slice_pitch` is the size in bytes of each 2D slice in the 3D image or - the size in bytes of each image in a 1D or 2D image array. - This must be 0 if _host_ptr_ is `NULL`. - If _host_ptr_ is not `NULL`, `image_slice_pitch` can be either 0 or {geq} - `image_row_pitch` {times} `image_height` for a 2D image array or 3D image - and can be either 0 or {geq} `image_row_pitch` for a 1D image array. - If _host_ptr_ is not `NULL` and `image_slice_pitch` = 0, `image_slice_pitch` - is calculated as `image_row_pitch` {times} `image_height` for a 2D image - array or 3D image and `image_row_pitch` for a 1D image array. - If `image_slice_pitch` is not 0, it must be a multiple of the - `image_row_pitch`. - * `num_mip_levels` and `num_samples` must be 0. - * `mem_object` may refer to a valid buffer or image memory object. + * _image_row_pitch_ is the scan-line pitch in bytes. + The _image_row_pitch_ must be zero if _host_ptr_ is `NULL`, +ifdef::cl_khr_external_memory[] + the image is not an image created from an external memory handle, +endif::cl_khr_external_memory[] + and the image is not a 2D image created from a buffer, + If _image_row_pitch_ is zero and _host_ptr_ is not `NULL`, then the + image row pitch is calculated as _image_width_ {times} the size of an + image element in bytes. +ifdef::cl_khr_external_memory[] + If _image_row_pitch_ is zero and the image is created from an external + memory handle, then the image row pitch is implementation-defined. +endif::cl_khr_external_memory[] + The image row pitch must be {geq} _image_width_ {times} the size of an + image element in bytes, and must be a multiple of the size of an image + element in bytes. + For a 2D image created from a buffer the image row pitch must also be a + multiple of the maximum of the {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} value + for all devices in the context that support images. + * _image_slice_pitch_ is the size in bytes of each 2D slice in a 3D image, + or the size in bytes of each image in a 1D or 2D image array. + The _image_slice_pitch_ must be zero if _host_ptr_ is `NULL` +ifdef::cl_khr_external_memory[] + and the image is not an image created from an external memory handle, +endif::cl_khr_external_memory[] + If _image_slice_pitch_ is zero and _host_ptr_ is not `NULL` then the + image slice pitch is calculated as the image row pitch {times} + _image_height_ for a 2D image array or a 3D image, and as the image row + pitch for a 1D image array. +ifdef::cl_khr_external_memory[] + If _image_slice_pitch_ is zero and the image is created from an external + memory handle, then the image slice pitch is implementation-defined. +endif::cl_khr_external_memory[] + The image slice pitch must be {geq} the image image row pitch {times} + _image_height_ for a 2D image array or a 3D image, must be {geq} the + image row pitch for a 1D image array, and must be a multiple of the + image row pitch. + * _num_mip_levels_ must be +ifndef::cl_khr_mipmap_image[0.] +ifdef::cl_khr_mipmap_image[] + 0 unless the `<>` extension is supported, in which + case it must be a value greater than 1 specifying the number of mipmap + levels in the image. +endif::cl_khr_mipmap_image[] + * _num_samples_ must be 0. + * _mem_object_ may refer to a valid buffer or image memory object. `mem_object` can be a buffer memory object if `image_type` is {CL_MEM_OBJECT_IMAGE1D_BUFFER} or {CL_MEM_OBJECT_IMAGE2D} footnote:[{fn-image-from-buffer}]. @@ -2258,16 +2707,19 @@ Restrictions are: * All of the values specified in _image_desc_ must match the image descriptor information associated with `mem_object`, except for `mem_object`. - * The image channel data type specified in _image_format_ must match the image channel data type associated with `mem_object`. - * The image channel order specified in _image_format_ must be compatible - with the image channel order associated with `mem_object`. - Compatible image channel orders - footnote:[{fn-compatible-image-channel-orders}] are: -+ --- + with the image channel order associated with `mem_object`, as described + in the <> table. + +NOTE: The image channel order compatibility constraint allows creation of a +sRGB view of the image from a linear RGB view or vice-versa, i.e. the pixels +stored in the image can be accessed as linear RGB or sRGB values. + +[[compatibile-image-channel-orders-table]] +.Compatible Image Channel Orders [width="100%",cols="<50%,<50%",options="header"] |==== | Image Channel Order in _image_format_: @@ -2291,7 +2743,6 @@ Restrictions are: | {CL_DEPTH} | {CL_R} |==== --- [NOTE] ==== @@ -2418,8 +2869,21 @@ is: {CL_FLOAT} | 1 | {CL_DEPTH} footnote:[{fn-depth-image-requirements}] + +ifdef::cl_khr_depth_images[] +Also supported if the `<>` extension is supported. +endif::cl_khr_depth_images[] | {CL_UNORM_INT16} + {CL_FLOAT} +ifdef::cl_khr_gl_depth_images[] +| 1 + | {CL_DEPTH_STENCIL} + | {CL_UNORM_INT24} + + {CL_FLOAT} + + See <>. +endif::cl_khr_gl_depth_images[] | 2 | {CL_RG} | {CL_UNORM_INT8} + @@ -2526,7 +2990,7 @@ the same kernel instance is: -- [[image-format-mapping]] -==== Image format mapping to OpenCL kernel language image access qualifiers +==== Image Format Mapping to OpenCL Kernel Language Image Access Qualifiers Image arguments to kernels may have the `read_only`, `write_only` or `read_write` qualifier. @@ -2558,6 +3022,234 @@ parameter and any other image parameter. |==== + +=== Mapping to External Image Formats + +OpenCL image objects can be created which share storage with image objects +in external APIs such as DirectX and OpenGL when the corresponding OpenCL +extensions are supported. +When creating such OpenCL images, there are restrictions on the allowed +formats. +The tables in this section list, for each such external API, the supported +image formats in that API and the corresponding OpenCL image format. + + +ifdef::cl_khr_dx9_media_sharing[] +==== Image Formats for DirectX 9 Media Surface Sharing + +When the `<>` extension is supported, image +objects sharing storage with Direct3D 9 surfaces can be created. +This section describes the Direct3D 9 surface formats that are supported +when the adapter type is one of the Direct 3D lineage. +Using a Direct3D 9 surface format not listed here is an error. +To extend the use of this extension to support media adapters beyond DirectX +9 tables similar to the ones in this section will need to be defined for the +surface formats supported by the new media adapter. +All implementations that support this extension are required to support the +NV12 surface format. +The other surface formats supported are the same surface formats that the +adapter you are sharing with supports as long as they are listed in the +<> or <> tables. + +[[fourcc-image-formats-table]] +.YUV FourCC Codes and Corresponding OpenCL Image Formats +[cols=",",options="header",] +|==== +| FOUR CC Code | CL Image Format (Channel Order, Channel Data Type) +| FOURCC('N','V','1','2'), Plane 0 | {CL_R}, {CL_UNORM_INT8} +| FOURCC('N','V','1','2'), Plane 1 | {CL_RG}, {CL_UNORM_INT8} +| FOURCC('Y','V','1','2'), Plane 0 | {CL_R}, {CL_UNORM_INT8} +| FOURCC('Y','V','1','2'), Plane 1 | {CL_R}, {CL_UNORM_INT8} +| FOURCC('Y','V','1','2'), Plane 2 | {CL_R}, {CL_UNORM_INT8} +|==== + +In the <> table, NV12 Plane 0 corresponds to the luminance (Y) +channel and Plane 1 corresponds to the UV channels. The YV12 Plane 0 +corresponds to the Y channel, Plane 1 corresponds to the V channel and Plane +2 corresponds to the U channel. +Note that the YUV formats map to {CL_R} and {CL_RG} but do not perform any +YUV to RGB conversion, and vice-versa. + +[[d3d9-image-formats-table]] +.Direct3D 9 Formats and Corresponding OpenCL Image Formats +[cols=",",options="header",] +|==== +| Direct3D 9 Format | CL Image Format (Channel Order, Channel Data Type) +| `D3DFMT_R32F` | {CL_R}, {CL_FLOAT} +| `D3DFMT_R16F` | {CL_R}, {CL_HALF_FLOAT} +| `D3DFMT_L16` | {CL_R}, {CL_UNORM_INT16} +| `D3DFMT_A8` | {CL_A}, {CL_UNORM_INT8} +| `D3DFMT_L8` | {CL_R}, {CL_UNORM_INT8} +| | +| `D3DFMT_G32R32F` | {CL_RG}, {CL_FLOAT} +| `D3DFMT_G16R16F` | {CL_RG}, {CL_HALF_FLOAT} +| `D3DFMT_G16R16` | {CL_RG}, {CL_UNORM_INT16} +| `D3DFMT_A8L8` | {CL_RG}, {CL_UNORM_INT8} +| | +| `D3DFMT_A32B32G32R32F` | {CL_RGBA}, {CL_FLOAT} +| `D3DFMT_A16B16G16R16F` | {CL_RGBA}, {CL_HALF_FLOAT} +| `D3DFMT_A16B16G16R16` | {CL_RGBA}, {CL_UNORM_INT16} +| `D3DFMT_A8B8G8R8` | {CL_RGBA}, {CL_UNORM_INT8} +| `D3DFMT_X8B8G8R8` | {CL_RGBA}, {CL_UNORM_INT8} +| `D3DFMT_A8R8G8B8` | {CL_BGRA}, {CL_UNORM_INT8} +| `D3DFMT_X8R8G8B8` | {CL_BGRA}, {CL_UNORM_INT8} +|==== + +NOTE: The Direct3D 9 format names in the table above seem to imply that the order +of the color channels are switched relative to OpenCL, but this is not the +case. +For example, the layout of channels for each pixel for +`D3DFMT_A32FB32FG32FR32F` is the same as {CL_RGBA}, {CL_FLOAT}. +endif::cl_khr_dx9_media_sharing[] + + +ifdef::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] +==== Image Formats for Direct3D Texture Sharing + +When the `<>` or `<>` extensions +are supported, image objects sharing storage with Direct3D 10 and Direct3D +11 textures, respectively, can be created. +The <> table describes the supported DirectX Graphics Infrastructure +(DXGI) texture formats. + +[[dxgi-image-formats-table]] +.DXGI Formats and Corresponding OpenCL Image Formats +[cols=",",options="header",] +|==== +| DXGI Format | CL Image Format (Channel Order, Channel Data Type) + +| `DXGI_FORMAT_R32G32B32A32_FLOAT` | {CL_RGBA}, {CL_FLOAT} +| `DXGI_FORMAT_R32G32B32A32_UINT` | {CL_RGBA}, {CL_UNSIGNED_INT32} +| `DXGI_FORMAT_R32G32B32A32_SINT` | {CL_RGBA}, {CL_SIGNED_INT32} +| | +| `DXGI_FORMAT_R16G16B16A16_FLOAT` | {CL_RGBA}, {CL_HALF_FLOAT} +| `DXGI_FORMAT_R16G16B16A16_UNORM` | {CL_RGBA}, {CL_UNORM_INT16} +| `DXGI_FORMAT_R16G16B16A16_UINT` | {CL_RGBA}, {CL_UNSIGNED_INT16} +| `DXGI_FORMAT_R16G16B16A16_SNORM` | {CL_RGBA}, {CL_SNORM_INT16} +| `DXGI_FORMAT_R16G16B16A16_SINT` | {CL_RGBA}, {CL_SIGNED_INT16} +| | +| `DXGI_FORMAT_B8G8R8A8_UNORM` | {CL_BGRA}, {CL_UNORM_INT8} +| `DXGI_FORMAT_R8G8B8A8_UNORM` | {CL_RGBA}, {CL_UNORM_INT8} +| `DXGI_FORMAT_R8G8B8A8_UINT` | {CL_RGBA}, {CL_UNSIGNED_INT8} +| `DXGI_FORMAT_R8G8B8A8_SNORM` | {CL_RGBA}, {CL_SNORM_INT8} +| `DXGI_FORMAT_R8G8B8A8_SINT` | {CL_RGBA}, {CL_SIGNED_INT8} +| | +| `DXGI_FORMAT_R32G32_FLOAT` | {CL_RG}, {CL_FLOAT} +| `DXGI_FORMAT_R32G32_UINT` | {CL_RG}, {CL_UNSIGNED_INT32} +| `DXGI_FORMAT_R32G32_SINT` | {CL_RG}, {CL_SIGNED_INT32} +| | +| `DXGI_FORMAT_R16G16_FLOAT` | {CL_RG}, {CL_HALF_FLOAT} +| `DXGI_FORMAT_R16G16_UNORM` | {CL_RG}, {CL_UNORM_INT16} +| `DXGI_FORMAT_R16G16_UINT` | {CL_RG}, {CL_UNSIGNED_INT16} +| `DXGI_FORMAT_R16G16_SNORM` | {CL_RG}, {CL_SNORM_INT16} +| `DXGI_FORMAT_R16G16_SINT` | {CL_RG}, {CL_SIGNED_INT16} +| | +| `DXGI_FORMAT_R8G8_UNORM` | {CL_RG}, {CL_UNORM_INT8} +| `DXGI_FORMAT_R8G8_UINT` | {CL_RG}, {CL_UNSIGNED_INT8} +| `DXGI_FORMAT_R8G8_SNORM` | {CL_RG}, {CL_SNORM_INT8} +| `DXGI_FORMAT_R8G8_SINT` | {CL_RG}, {CL_SIGNED_INT8} +| | +| `DXGI_FORMAT_R32_FLOAT` | {CL_R}, {CL_FLOAT} +| `DXGI_FORMAT_R32_UINT` | {CL_R}, {CL_UNSIGNED_INT32} +| `DXGI_FORMAT_R32_SINT` | {CL_R}, {CL_SIGNED_INT32} +| | +| `DXGI_FORMAT_R16_FLOAT` | {CL_R}, {CL_HALF_FLOAT} +| `DXGI_FORMAT_R16_UNORM` | {CL_R}, {CL_UNORM_INT16} +| `DXGI_FORMAT_R16_UINT` | {CL_R}, {CL_UNSIGNED_INT16} +| `DXGI_FORMAT_R16_SNORM` | {CL_R}, {CL_SNORM_INT16} +| `DXGI_FORMAT_R16_SINT` | {CL_R}, {CL_SIGNED_INT16} +| | +| `DXGI_FORMAT_R8_UNORM` | {CL_R}, {CL_UNORM_INT8} +| `DXGI_FORMAT_R8_UINT` | {CL_R}, {CL_UNSIGNED_INT8} +| `DXGI_FORMAT_R8_SNORM` | {CL_R}, {CL_SNORM_INT8} +| `DXGI_FORMAT_R8_SINT` | {CL_R}, {CL_SIGNED_INT8} +|==== +endif::cl_khr_d3d10_sharing,cl_khr_d3d11_sharing[] + + +ifdef::cl_khr_gl_sharing[] +==== Image Formats for OpenGL Texture and Renderbuffer Sharing + +When the `<>` extension is supported, image objects +sharing storage with OpenGL texture and renderbuffer objects can be created. +The <> table describes the supported OpenGL image +formats. +If an OpenGL texture or renderbuffer object with an internal format from the +table is successfully created by OpenGL, then there is guaranteed to be a +mapping to one of the corresponding OpenCL image format(s) in the table. +Texture and renderbuffer objects created with other OpenGL internal formats +may (but are not guaranteed to) have a mapping to an OpenCL image format. +If such mappings exist, they are guaranteed to preserve all color +components, data types, and at least the number of bits/component actually +allocated by OpenGL for that format. + +[[opengl-image-formats-table]] +.OpenGL Internal Formats and Corresponding OpenCL Internal Formats +[cols=",",options="header",] +|==== +| OpenGL internal format | OpenCL Image Format (Channel Order, Channel Data Type) +| `GL_RGBA8` | {CL_RGBA}, {CL_UNORM_INT8} or + + {CL_BGRA}, {CL_UNORM_INT8} +| `GL_SRGB8_ALPHA8` | {CL_sRGBA}, {CL_UNORM_INT8} +| `GL_RGBA`, `GL_UNSIGNED_INT_8_8_8_8_REV` | {CL_RGBA}, {CL_UNORM_INT8} +| `GL_BGRA`, `GL_UNSIGNED_INT_8_8_8_8_REV` | {CL_BGRA}, {CL_UNORM_INT8} +| | +| `GL_RGBA8I`, `GL_RGBA8I_EXT` | {CL_RGBA}, {CL_SIGNED_INT8} +| `GL_RGBA16I`, `GL_RGBA16I_EXT` | {CL_RGBA}, {CL_SIGNED_INT16} +| `GL_RGBA32I`, `GL_RGBA32I_EXT` | {CL_RGBA}, {CL_SIGNED_INT32} +| | +| `GL_RGBA8UI`, `GL_RGBA8UI_EXT` | {CL_RGBA}, {CL_UNSIGNED_INT8} +| `GL_RGBA16UI`, `GL_RGBA16UI_EXT` | {CL_RGBA}, {CL_UNSIGNED_INT16} +| `GL_RGBA32UI`, `GL_RGBA32UI_EXT` | {CL_RGBA}, {CL_UNSIGNED_INT32} +| | +| `GL_RGBA8_SNORM` | {CL_RGBA}, {CL_SNORM_INT8} +| `GL_RGBA16` | {CL_RGBA}, {CL_UNORM_INT16} +| `GL_RGBA16_SNORM` | {CL_RGBA}, {CL_SNORM_INT16} +| `GL_RGBA16F`, `GL_RGBA16F_ARB` | {CL_RGBA}, {CL_HALF_FLOAT} +| `GL_RGBA32F`, `GL_RGBA32F_ARB` | {CL_RGBA}, {CL_FLOAT} +| | +| `GL_R8` | {CL_R}, {CL_UNORM_INT8} +| `GL_R8_SNORM` | {CL_R}, {CL_SNORM_INT8} +| `GL_R16` | {CL_R}, {CL_UNORM_INT16} +| `GL_R16_SNORM` | {CL_R}, {CL_SNORM_INT16} +| `GL_R16F` | {CL_R}, {CL_HALF_FLOAT} +| `GL_R32F` | {CL_R}, {CL_FLOAT} +| | +| `GL_R8I` | {CL_R}, {CL_SIGNED_INT8} +| `GL_R16I` | {CL_R}, {CL_SIGNED_INT16} +| `GL_R32I` | {CL_R}, {CL_SIGNED_INT32} +| `GL_R8UI` | {CL_R}, {CL_UNSIGNED_INT8} +| `GL_R16UI` | {CL_R}, {CL_UNSIGNED_INT16} +| `GL_R32UI` | {CL_R}, {CL_UNSIGNED_INT32} +| | +| `GL_RG8` | {CL_RG}, {CL_UNORM_INT8} +| `GL_RG8_SNORM` | {CL_RG}, {CL_SNORM_INT8} +| `GL_RG16` | {CL_RG}, {CL_UNORM_INT16} +| `GL_RG16_SNORM` | {CL_RG}, {CL_SNORM_INT16} +| `GL_RG16F` | {CL_RG}, {CL_HALF_FLOAT} +| `GL_RG32F` | {CL_RG}, {CL_FLOAT} +| | +| `GL_RG8I` | {CL_RG}, {CL_SIGNED_INT8} +| `GL_RG16I` | {CL_RG}, {CL_SIGNED_INT16} +| `GL_RG32I` | {CL_RG}, {CL_SIGNED_INT32} +| `GL_RG8UI` | {CL_RG}, {CL_UNSIGNED_INT8} +| `GL_RG16UI` | {CL_RG}, {CL_UNSIGNED_INT16} +| `GL_RG32UI` | {CL_RG}, {CL_UNSIGNED_INT32} +ifdef::cl_khr_gl_depth_images[] +| `GL_DEPTH_COMPONENT32F` | {CL_DEPTH}, {CL_FLOAT} +| `GL_DEPTH_COMPONENT16` | {CL_DEPTH}, {CL_UNORM_INT16} +| `GL_DEPTH24_STENCIL8` | {CL_DEPTH_STENCIL}, {CL_UNORM_INT24} +| `GL_DEPTH32F_STENCIL8` | {CL_DEPTH_STENCIL}, {CL_FLOAT} +endif::cl_khr_gl_depth_images[] +|==== +endif::cl_khr_gl_sharing[] + + === Reading, Writing and Copying Image Objects [open,refpage='clEnqueueReadImage',desc='Enqueue commands to read from an image or image array object to host memory.',type='protos',alias='clEnqueueWriteImage'] @@ -2598,6 +3290,11 @@ include::{generated}/api/version-notes/clEnqueueWriteImage.asciidoc[] _region_[2] must be 1. If _image_ is a 1D image array object, _region_[2] must be 1. The values in _region_ cannot be 0. +ifdef::cl_khr_mipmap_image[] + If _image_ is a mipmapped image, the mip level to read or write is + determined from _origin_ as described in <> +endif::cl_khr_mipmap_image[] * _row_pitch_ in {clEnqueueReadImage} and _input_row_pitch_ in {clEnqueueWriteImage} is the length of each row in bytes. This value must be greater than or equal to the element size in bytes @@ -2714,6 +3411,11 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_mipmap_image[] + * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + supported, and the mip level specified in _origin_ is not a valid level + for _image_, +endif::cl_khr_mipmap_image[] [NOTE] ==== @@ -2779,6 +3481,11 @@ include::{generated}/api/version-notes/clEnqueueCopyImage.asciidoc[] image index in the 1D image array. If _src_image_ is a 2D image array object, _src_origin_[2] describes the image index in the 2D image array. +ifdef::cl_khr_mipmap_image[] + If _src_image_ is a mipmapped image, the mip level to read is determined + from _src_origin_ as described in <> +endif::cl_khr_mipmap_image[] * _dst_origin_ defines the (_x_, _y_, _z_) offset in pixels in the 1D, 2D or 3D image, the (_x_, _y_) offset and the image index in the 2D image array or the (_x_) offset and the image index in the 1D image array. @@ -2790,6 +3497,11 @@ include::{generated}/api/version-notes/clEnqueueCopyImage.asciidoc[] image index in the 1D image array. If _dst_image_ is a 2D image array object, _dst_origin_[2] describes the image index in the 2D image array. +ifdef::cl_khr_mipmap_image[] + If _dst_image_ is a mipmapped image, the mip level to write is + determined from _dst_origin_ as described in <> +endif::cl_khr_mipmap_image[] * _region_ defines the (_width_, _height_, _depth_) in pixels of the 1D, 2D or 3D rectangle, the (_width_, _height_) in pixels of the 2D rectangle and the number of images of a 2D image array or the (_width_) in pixels of the 1D @@ -2872,6 +3584,11 @@ Otherwise, it returns one of the following errors: <> table is {CL_FALSE}). * {CL_MEM_COPY_OVERLAP} if _src_image_ and _dst_image_ are the same image object and the source and destination regions overlap. +ifdef::cl_khr_mipmap_image[] + * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + supported, and the mip level specified in _src_origin_ or _dst_origin_ + is not a valid level for the corresponding _src_image_ or _dst_image_, +endif::cl_khr_mipmap_image[] -- @@ -2901,8 +3618,8 @@ include::{generated}/api/version-notes/clEnqueueFillImage.asciidoc[] _image_ channel data type is an unnormalized signed integer type and is a four component unsigned integer value if the _image_ channel data type is an unnormalized unsigned integer type. - The fill color will be converted to the appropriate image channel format and - order associated with _image_. + The fill color will be converted to the appropriate image channel format + and order associated with _image_. * _origin_ defines the (_x_, _y_, _z_) offset in pixels in the 1D, 2D or 3D image, the (_x_, _y_) offset and the image index in the 2D image array or the (_x_) offset and the image index in the 1D image array. @@ -2914,6 +3631,11 @@ include::{generated}/api/version-notes/clEnqueueFillImage.asciidoc[] in the 1D image array. If _image_ is a 2D image array object, _origin_[2] describes the image index in the 2D image array. +ifdef::cl_khr_mipmap_image[] + If _image_ is a mipmapped image, the mip level to fill is determined + from _origin_ as described in <> +endif::cl_khr_mipmap_image[] * _region_ defines the (_width_, _height_, _depth_) in pixels of the 1D, 2D or 3D rectangle, the (_width_, _height_) in pixels of the 2D rectangle and the number of images of a 2D image array or the (_width_) in pixels of the 1D @@ -2983,10 +3705,15 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_mipmap_image[] + * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + supported, and the mip level specified in _origin_ is not a valid level + for _image_, +endif::cl_khr_mipmap_image[] -- -=== Copying between Image and Buffer Objects +=== Copying Between Image and Buffer Objects [open,refpage='clEnqueueCopyImageToBuffer',desc='Enqueues a command to copy an image object to a buffer object.',type='protos'] -- @@ -3012,6 +3739,11 @@ include::{generated}/api/version-notes/clEnqueueCopyImageToBuffer.asciidoc[] image index in the 1D image array. If _src_image_ is a 2D image array object, _src_origin_[2] describes the image index in the 2D image array. +ifdef::cl_khr_mipmap_image[] + If _src_image_ is a mipmapped image, the mip level to read is determined + from _src_origin_ as described in <> +endif::cl_khr_mipmap_image[] * _region_ defines the (_width_, _height_, _depth_) in pixels of the 1D, 2D or 3D rectangle, the (_width_, _height_) in pixels of the 2D rectangle and the number of images of a 2D image array or the (_width_) in pixels of the 1D @@ -3100,6 +3832,11 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_mipmap_image[] + * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + supported, and the mip level specified in _src_origin_ is not a valid + level for _src_image_, +endif::cl_khr_mipmap_image[] -- @@ -3129,6 +3866,11 @@ include::{generated}/api/version-notes/clEnqueueCopyBufferToImage.asciidoc[] image index in the 1D image array. If _dst_image_ is a 2D image array object, _dst_origin_[2] describes the image index in the 2D image array. +ifdef::cl_khr_mipmap_image[] + If _dst_image_ is a mipmapped image, the mip level to write is + determined from _dst_origin_ as described in <> +endif::cl_khr_mipmap_image[] * _region_ defines the (_width_, _height_, _depth_) in pixels of the 1D, 2D or 3D rectangle, the (_width_, _height_) in pixels of the 2D rectangle and the number of images of a 2D image array or the (_width_) in pixels of the 1D @@ -3217,6 +3959,11 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +ifdef::cl_khr_mipmap_image[] + * {CL_INVALID_MIP_LEVEL} if the `<>` extension is + supported, and the mip level specified in _dst_origin_ is not a valid + level for _dst_image_, +endif::cl_khr_mipmap_image[] -- @@ -3385,6 +4132,33 @@ Objects>>. -- +ifdef::cl_khr_mipmap_image[] +[[image-mipmap-access]] +=== Specifying Mipmap Levels to Image Operations + +When the `<>` extension is supported, the +{clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueMapImage}, +{clEnqueueCopyImage}, {clEnqueueCopyImageToBuffer}, +{clEnqueueCopyBufferToImage}, and {clEnqueueFillImage} functions described +above can operate on mipmapped images. + +The mipmap image level(s) to access for each command are determined from the +_origin_ parameter when accessing a single _image_ (non-copy functions), or +from the _src_origin_ and _dst_origin_ parameters when accessing two +_src_image_ and _dst_image_ images (copy functions). The logic below applies +to each of these parameters, with _image_ and _origin_ replaced by +_src_image_ and _src_origin_, or _dst_image_ and _dst_origin_ as +appropriate: + + * If _image_ is a 1D image, _origin_[1] specifies the mip level to use. + * If _image_ is a 1D image array, _origin_[2] specifies the mip level to + use. + * If _image_ is a 2D image, _origin_[2] specifies the mip level to use. + * If _image_ is a 2D image array or a 3D image, _origin_[3] specifies the + mip level to use. +endif::cl_khr_mipmap_image[] + + [[image-object-queries]] === Image Object Queries @@ -3499,6 +4273,36 @@ include::{generated}/api/version-notes/CL_IMAGE_NUM_MIP_LEVELS.asciidoc[] include::{generated}/api/version-notes/CL_IMAGE_NUM_SAMPLES.asciidoc[] | {cl_uint_TYPE} | Return `num_samples` associated with _image_. + +ifdef::cl_khr_dx9_media_sharing[] +| {CL_IMAGE_DX9_MEDIA_PLANE_KHR_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_DX9_MEDIA_PLANE_KHR.asciidoc[] + | {cl_uint_TYPE} + | If _image_ was created using {clCreateFromDX9MediaSurfaceKHR}, + returns the _plane_ argument specified when _image_ was created. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +| {CL_IMAGE_D3D10_SUBRESOURCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_D3D10_SUBRESOURCE_KHR.asciidoc[] + | {cl_uint_TYPE} + | If _image_ was created using {clCreateFromD3D10Texture2DKHR}, or + {clCreateFromD3D10Texture3DKHR}, returns the _subresource_ argument + specified when _image_ was created. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +| {CL_IMAGE_D3D11_SUBRESOURCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_IMAGE_D3D11_SUBRESOURCE_KHR.asciidoc[] + | {cl_uint_TYPE} + | If _image_ was created using {clCreateFromD3D11Texture2DKHR}, or + {clCreateFromD3D11Texture3DKHR}, returns the _subresource_ argument + specified when _image_ was created. +endif::cl_khr_d3d11_sharing[] + |==== // refError @@ -3516,80 +4320,828 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. --- +ifdef::cl_khr_dx9_media_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _param_name_ is + {CL_IMAGE_DX9_MEDIA_PLANE_KHR} and _image_ was not created by calling + {clCreateFromDX9MediaSurfaceKHR}. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D10_RESOURCE_KHR} if _param_name_ is + {CL_IMAGE_D3D10_SUBRESOURCE_KHR} and _image_ was not created by the + function {clCreateFromD3D10Texture2DKHR}, or + {clCreateFromD3D10Texture3DKHR}. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +The following errors may be returned if the `<>` +extension is supported: + + * {CL_INVALID_D3D11_RESOURCE_KHR} if _param_name_ is + {CL_IMAGE_D3D11_SUBRESOURCE_KHR} and _image_ was not created by the + function {clCreateFromD3D11Texture2DKHR}, or + {clCreateFromD3D11Texture3DKHR}. +endif::cl_khr_d3d11_sharing[] + +-- + + +ifdef::cl_khr_dx9_media_sharing[] +=== Creating OpenCL Image Objects From DirectX 9 Media Resources + +[open,refpage='clCreateFromDX9MediaSurfaceKHR',desc='Create OpenCL image object from a media surface',type='protos'] +-- +To create an OpenCL image object from a media surface, call the function + +include::{generated}/api/protos/clCreateFromDX9MediaSurfaceKHR.txt[] +include::{generated}/api/version-notes/clCreateFromDX9MediaSurfaceKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a media adapter. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _adapter_type_ is a value from enumeration of supported adapters + described in the <> table. + The type of _surface_info_ is determined by the adapter type. + The implementation does not need to support all adapter types. + This approach provides flexibility to support additional adapter types + in the future. + Supported adapter types are {CL_ADAPTER_D3D9_KHR}, + {CL_ADAPTER_D3D9EX_KHR} and {CL_ADAPTER_DXVA_KHR}. + * _surface_info_ is a pointer to one of the structures defined in the + _adapter_type_ description above, passed in as a `void *`. + If _adapter_type_ is {CL_ADAPTER_D3D9_KHR}, {CL_ADAPTER_D3D9EX_KHR} and + {CL_ADAPTER_DXVA_KHR}, _surface_info_ points to a + <> structure describing the surface. + * _plane_ is the plane of resource to share for planar surface formats. + For planar formats, we use the plane parameter to obtain a handle to + thie specific plane (Y, U or V for example). + For non-planar formats used by media, _plane_ must be 0. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -== Pipes +The width and height of the returned OpenCL 2D image object are determined +by the width and height of the _plane_ of the resource +(_surface_info_->_resource_). +The channel type and order of the returned image object is determined by the +format and plane of the resource, and are described in the +<> and <> tables. -NOTE: Pipes are <> version 2.0. +This call will increment the internal media surface count on the resource. +The internal media surface reference count on the resource will be +decremented when the OpenCL reference count on the returned OpenCL memory +object drops to zero. -A _pipe_ is a memory object that stores data organized as a FIFO. -Pipe objects can only be accessed using built-in functions that read from -and write to a pipe. -Pipe objects are not accessible from the host. -A pipe object encapsulates the following information: +// refError - * Packet size in bytes - * Maximum capacity in packets - * Information about the number of packets currently in the pipe - * Data packets +{clCreateFromDX9MediaSurfaceKHR} returns a valid non-zero 2D image object +and _errcode_ret_ is set to {CL_SUCCESS} if the 2D image object is created +successfully. +Otherwise it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + _plane_ is not a valid plane of _resource_ specified in _surface_info_. + * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _resource_ specified in + _surface_info_ is not a valid resource or is not associated with + _adapter_type_ (e.g., _adapter_type_ is set to {CL_ADAPTER_D3D9_KHR} and + _resource_ is not a Direct3D 9 surface created in D3DPOOL_DEFAULT). + * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _shared_handle_ specified in + _surface_info_ is not `NULL` or a valid handle value. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the texture format of _resource_ + is not listed in the <> or <> tables. + * {CL_INVALID_OPERATION} if there are no devices in _context_ that support + _adapter_type_. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +[open,refpage='cl_dx9_surface_info_khr',desc='Structure describing a DX surface',type='structs'] +-- +The {cl_dx9_surface_info_khr_TYPE} structure is passed to +{clCreateFromDX9MediaSurfaceKHR} to describe a DX9 surface, and is defined +as: -=== Creating Pipe Objects +include::{generated}/api/structs/cl_dx9_surface_info_khr.txt[] -[open,refpage='clCreatePipe',desc='Creates a pipe object.',type='protos'] + * _resource_ is a pointer to a `IDirect3DSurface9` surface interface. + * _shared_handle_ is a `HANDLE` to the resource. + +For DX9 surfaces, we need both the handle to the resource and the resource +itself to have a sufficient amount of information to eliminate a copy of the +surface for sharing in cases where this is possible. +Elimination of the copy is driver dependent. +_shared_handle_ may be `NULL` and this may result in sub-optimal +performance. -- -To create a *pipe object*, call the function +endif::cl_khr_dx9_media_sharing[] -include::{generated}/api/protos/clCreatePipe.txt[] -include::{generated}/api/version-notes/clCreatePipe.asciidoc[] - * _context_ is a valid OpenCL context used to create the pipe object. - * _flags_ is a bit-field that is used to specify allocation and usage - information such as the memory arena that should be used to allocate the - pipe object and how it will be used. - The <> table describes the possible values for - _flags_. - Only {CL_MEM_READ_WRITE} and {CL_MEM_HOST_NO_ACCESS} can be specified when - creating a pipe object. - If the value specified for _flags_ is 0, the default is used which is - {CL_MEM_READ_WRITE} | {CL_MEM_HOST_NO_ACCESS}. - * _pipe_packet_size_ is the size in bytes of a pipe packet. - * _pipe_max_packets_ specifies the pipe capacity by specifying the maximum - number of packets the pipe can hold. - * _properties_ specifies a list of properties for the pipe and their - corresponding values. - Each property name is immediately followed by the corresponding desired - value. - The list is terminated with 0. - Currently, in all OpenCL versions, _properties_ must be `NULL`. +ifdef::cl_khr_d3d10_sharing[] +=== Creating OpenCL Image Objects From Direct3D 10 Textures and Resources + +[open,refpage='clCreateFromD3D10Texture2DKHR',desc='Create OpenCL 2D image object from a Direct3D 10 2D texture',type='protos'] +-- +To create an OpenCL 2D image object from a subresource of a Direct3D 10 2D +texture, call the function + +include::{generated}/api/protos/clCreateFromD3D10Texture2DKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D10Texture2DKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 10 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 10 2D texture to share. + * _subresource_ is the subresource of _resource_ to share. * _errcode_ret_ will return an appropriate error code. If _errcode_ret_ is `NULL`, no error code is returned. +The width and height of the returned OpenCL 2D image object are determined +by the width and height of subresource _subresource_ of _resource_. +The channel type and order of the returned OpenCL 2D image object is +determined by the format of _resource_ and the <> table. + +This call will increment the internal Direct3D 10 reference count on +_resource_. +The internal Direct3D 10 reference count on _resource_ will be decremented +when the OpenCL reference count on the returned OpenCL memory object drops +to zero. + +NOTE: Refer to the <> and +<> sections for +more information. + // refError -{clCreatePipe} returns a valid non-zero pipe object and _errcode_ret_ is set -to {CL_SUCCESS} if the pipe object is created successfully. +{clCreateFromD3D10Texture2DKHR} returns a valid non-zero OpenCL image object +and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. Otherwise, it returns a `NULL` value with one of the following error values returned in _errcode_ret_: * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_OPERATION} if no devices in _context_ support pipes. - * {CL_INVALID_VALUE} if values specified in _flags_ are not as defined - above. - * {CL_INVALID_VALUE} if _properties_ is not `NULL`. - * {CL_INVALID_PIPE_SIZE} if _pipe_packet_size_ is 0 or the - _pipe_packet_size_ exceeds {CL_DEVICE_PIPE_MAX_PACKET_SIZE} value - specified in the <> table for all - devices in _context_ or if _pipe_max_packets_ is 0. - * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate - memory for the pipe object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + _subresource_ is not a valid subresource index for _resource_. + * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 + texture resource, if _resource_ was created with the D3D10_USAGE flag + D3D10_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a + {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already + been created using {clCreateFromD3D10Texture2DKHR}, or if _context_ was + not created against the same Direct3D 10 device from which _resource_ + was created. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 10 texture format + of _resource_ is not listed in the <> table or if the + Direct3D 10 texture format of _resource_ does not map to a supported + OpenCL image format. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +-- -Pipes follow the same memory consistency model as defined for buffer and +[open,refpage='clCreateFromD3D10Texture3DKHR',desc='Create OpenCL 3D image object from a Direct3D 10 3D texture',type='protos'] +-- +To create an OpenCL 3D image object from a subresource of a Direct3D 10 3D +texture, call the function + +include::{generated}/api/protos/clCreateFromD3D10Texture3DKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D10Texture3DKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 10 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 10 3D texture to share. + * _subresource_ is the subresource of _resource_ to share. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +The width, height and depth of the returned OpenCL 3D image object are +determined by the width, height and depth of subresource _subresource_ of +_resource_. +The channel type and order of the returned OpenCL 3D image object is +determined by the format of _resource_ and the <> table. + +This call will increment the internal Direct3D 10 reference count on +_resource_. +The internal Direct3D 10 reference count on _resource_ will be decremented when +the OpenCL reference count on the returned OpenCL memory object drops to +zero. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromD3D10Texture3DKHR} returns a valid non-zero OpenCL image object +and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + _subresource_ is not a valid subresource index for _resource_. + * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 + texture resource, if _resource_ was created with the D3D10_USAGE flag + D3D10_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a + {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already + been created using {clCreateFromD3D10Texture3DKHR}, or if _context_ was + not created against the same Direct3D 10 device from which _resource_ + was created. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 10 texture format + of _resource_ is not listed in the <> table or if the + Direct3D 10 texture format of _resource_ does not map to a supported + OpenCL image format. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_d3d10_sharing[] + + +ifdef::cl_khr_d3d11_sharing[] +=== Creating OpenCL Image Objects From Direct3D 11 Textures and Resources + +[open,refpage='clCreateFromD3D11Texture2DKHR',desc='Create OpenCL 2D image object from a Direct3D 11 2D texture',type='protos'] +-- +To create an OpenCL 2D image object from a subresource of a Direct3D 11 2D +texture, call the function + +include::{generated}/api/protos/clCreateFromD3D11Texture2DKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D11Texture2DKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 11 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 11 2D texture to share. + * _subresource_ is the subresource of _resource_ to share. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +The width and height of the returned OpenCL 2D image object are determined +by the width and height of subresource _subresource_ of _resource_. +The channel type and order of the returned OpenCL 2D image object is +determined by the format of _resource_ and the <> table. + +This call will increment the internal Direct3D 11 reference count on +_resource_. +The internal Direct3D 11 reference count on _resource_ will be decremented when +the OpenCL reference count on the returned OpenCL memory object drops to +zero. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromD3D11Texture2DKHR} returns a valid non-zero OpenCL image object +and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + _subresource_ is not a valid subresource index for _resource_. + * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 + texture resource, if _resource_ was created with the D3D11_USAGE flag + D3D11_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a + {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already + been created using {clCreateFromD3D11Texture2DKHR}, or if _context_ was + not created against the same Direct3D 11 device from which _resource_ + was created. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 11 texture format + of _resource_ is not listed in the <> table or if the + Direct3D 11 texture format of _resource_ does not map to a supported + OpenCL image format. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clCreateFromD3D11Texture3DKHR',desc='Create OpenCL 3D image object from a Direct3D 11 3D texture',type='protos'] +-- +To create an OpenCL 3D image object from a subresource of a Direct3D 11 3D +texture, call the function + +include::{generated}/api/protos/clCreateFromD3D11Texture3DKHR.txt[] +include::{generated}/api/version-notes/clCreateFromD3D11Texture3DKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from a Direct3D 11 device. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _resource_ is a pointer to the Direct3D 11 3D texture to share. + * _subresource_ is the subresource of _resource_ to share. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +The width, height and depth of the returned OpenCL 3D image object are +determined by the width, height and depth of subresource _subresource_ of +_resource_. +The channel type and order of the returned OpenCL 3D image object is +determined by the format of _resource_ and the <> table. + +This call will increment the internal Direct3D 11 reference count on +_resource_. +The internal Direct3D 11 reference count on _resource_ will be decremented when +the OpenCL reference count on the returned OpenCL memory object drops to +zero. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromD3D11Texture3DKHR} returns a valid non-zero OpenCL image object +and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + _subresource_ is not a valid subresource index for _resource_. + * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 + texture resource, if _resource_ was created with the D3D11_USAGE flag + D3D11_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a + {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already + been created using {clCreateFromD3D11Texture3DKHR}, or if _context_ was + not created against the same Direct3D 11 device from which _resource_ + was created. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 11 texture format + of _resource_ is not listed in the <> table or if the + Direct3D 11 texture format of _resource_ does not map to a supported + OpenCL image format. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_d3d11_sharing[] + + +ifdef::cl_khr_egl_image[] +=== Creating OpenCL Image Objects From EGL Images + +[open,refpage='clCreateFromEGLImageKHR',desc='Create cl_mem target from EGLImage source',type='protos'] +-- +To create an `EGLImage` target of type {cl_mem} from the `EGLImage` source +provided as _image_, call the function + +include::{generated}/api/protos/clCreateFromEGLImageKHR.txt[] +include::{generated}/api/version-notes/clCreateFromEGLImageKHR.asciidoc[] + + * _display_ should be of type `EGLDisplay`, cast into the type + {CLeglDisplayKHR}. + * _image_ should be of type `EGLImageKHR`, cast into the type + {CLeglImageKHR_TYPE}. + Assuming no errors are generated in this function, the resulting image + object will be an `EGLImage` target of the specified `EGLImage` _image_. + The resulting {cl_mem} is an image object which may be used normally by + all OpenCL operations. + This maps to an `image2d_t` type in OpenCL kernel code. + * _flags_ is a bit-field that is used to specify usage information about + the memory object being created. + Refer to the <> table for a + description of _flags_. + Accepted values in _flags_ are described below. + * _properties_ specifies a list of property names and their corresponding + values. + Each property name is immediately followed by the corresponding desired + value. + The list is terminated with 0. + No properties are currently supported with this version of the + extension. + _properties_ can be `NULL`. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +Accepted for _flags_ are {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and +CL_MEM_READ_WRITE. +If OpenCL 1.2 is supported, _flags_ also accepts {CL_MEM_HOST_WRITE_ONLY}, +{CL_MEM_HOST_READ_ONLY}, and {CL_MEM_HOST_NO_ACCESS}. + +`<>` only requires support for {CL_MEM_READ_ONLY}, and for +CL_MEM_HOST_NO_ACCESS if OpenCL 1.2 or later is supported. +For OpenCL 1.1, a {CL_INVALID_OPERATION} will be returned for images which +do not support host mapping. + +If the value passed in _flags_ is not supported by the OpenCL +implementation, it will return {CL_INVALID_VALUE}. +The accepted _flags_ may be dependent upon the texture format used. + +// refError + +{clCreateFromEGLImageKHR} returns a valid non-zero OpenCL image object and +_errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid OpenCL context. + * {CL_INVALID_VALUE} if _properties_ contains invalid values, if _display_ + is not a valid display object or if _flags_ are not in the set defined + above. + * {CL_INVALID_EGL_OBJECT_KHR} if _image_ is not a valid `EGLImage` object. + * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if the OpenCL implementation is not able + to create a {cl_mem} compatible with the provided {CLeglImageKHR_TYPE} + for an implementation-dependent reason (this could be caused by, but not + limited to, reasons such as unsupported texture formats, etc). + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_INVALID_OPERATION} if there are no devices in _context_ that support + images (i.e. {CL_DEVICE_IMAGE_SUPPORT} specified in table 4.3 is + {CL_FALSE}) or if the flags passed are not supported for that image + type. +-- +endif::cl_khr_egl_image[] + + +ifdef::cl_khr_gl_sharing[] +=== Creating OpenCL Image Objects From OpenGL Textures and Renderbuffers + +[open,refpage='clCreateFromGLTexture',desc='Create OpenCL image object from an OpenGL texture object',type='protos'] +-- +To create an OpenCL image object from an OpenGL texture object, call the +function + +include::{generated}/api/protos/clCreateFromGLTexture.txt[] +include::{generated}/api/version-notes/clCreateFromGLTexture.asciidoc[] + + * _context_ is a valid OpenCL context created from an OpenGL context. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a + description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _texture_target_ must be one of `GL_TEXTURE_1D`, `GL_TEXTURE_1D_ARRAY`, + `GL_TEXTURE_BUFFER`, `GL_TEXTURE_2D`, `GL_TEXTURE_2D_ARRAY`, + `GL_TEXTURE_3D`, `GL_TEXTURE_CUBE_MAP_POSITIVE_X`, + `GL_TEXTURE_CUBE_MAP_POSITIVE_Y`, `GL_TEXTURE_CUBE_MAP_POSITIVE_Z`, + `GL_TEXTURE_CUBE_MAP_NEGATIVE_X`, `GL_TEXTURE_CUBE_MAP_NEGATIVE_Y`, + `GL_TEXTURE_CUBE_MAP_NEGATIVE_Z`. + `GL_TEXTURE_RECTANGLE` or the equivalent `GL_TEXTURE_RECTANGLE_ARB` may + be specified if an OpenGL implementation supporting rectangular textures + is supported. +ifdef::cl_khr_gl_msaa_sharing[] + `GL_TEXTURE_2D_MULTISAMPLE` and `GL_TEXTURE_2D_MULTISAMPLE_ARRAY` may be + specified if an OpenGL implementation supporting multi-sample + two-dimensional textures is supported, and the + `<>` extension is supported. + Refer to the <> section for more information on multi-sample images. +endif::cl_khr_gl_msaa_sharing[] + _texture_target_ is used only to define the image type of _texture_. + No reference to a bound OpenGL texture object is made or implied by this + parameter. + * _miplevel_ is the mipmap level to be used. + If _texture_target_ is `GL_TEXTURE_BUFFER`, _miplevel_ must be 0. + Note: Implementations may return {CL_INVALID_OPERATION} for miplevel + values > 0. + * _texture_ is the name of an OpenGL 1D, 2D, 3D, 1D array, 2D array, + cubemap, rectangle or buffer texture object. + The texture object must be a complete texture as per OpenGL rules on + texture completeness. + The _texture_ format and dimensions defined by OpenGL for the specified + _miplevel_ of the texture will be used to create the OpenCL image memory + object. + Only OpenGL texture objects with an internal format that maps to an + appropriate <> and + <> may be used + to create the OpenCL image memory object. + * _errcode_ret_ will return an appropriate error code as described below. + If _errcode_ret_ is `NULL`, no error code is returned. + +{clCreateFromGLTexture} may create any of the following: + + * an OpenCL 2D image object from an OpenGL 2D texture object or a single + face of an OpenGL cubemap texture object, + * an OpenCL 2D image array object from an OpenGL 2D texture array object, + * an OpenCL 2D multi-sample image object from an OpenGL 2D multi-sample + texture. + * an OpenCL 2D multi-sample array image object from an OpenGL 2D + multi-sample texture. + * an OpenCL 1D image object from an OpenGL 1D texture object, + * an OpenCL 1D image buffer object from an OpenGL texture buffer object, + * an OpenCL 1D image array object from an OpenGL 1D texture array object, + * an OpenCL 3D image object from an OpenGL 3D texture object. + +ifdef::cl_khr_mipmap_image[] +If both the `<>` and `<>` extensions +are supported by the OpenCL device, {clCreateFromGLTexture} may also be used +to create a mipmapped OpenCL image from a mipmapped OpenGL texture by +specify a negative value for _miplevel_. +In this case, then an OpenCL mipmapped image object is created from a +mipmapped OpenGL texture object, instead of an OpenCL image object for a +specific miplevel of the OpenGL texture. + +NOTE: For a detailed description of how the level of detail is computed, +please refer to the "`Scale Factor and Level-of-Detail`" section of the +OpenGL 4.6 Specification. +endif::cl_khr_mipmap_image[] + +If the state of an OpenGL texture object is modified through the OpenGL API +(e.g. `glTexImage2D`, `glTexImage3D` or the values of the texture parameters +`GL_TEXTURE_BASE_LEVEL` or `GL_TEXTURE_MAX_LEVEL` are modified) while there +exists a corresponding OpenCL image object, subsequent use of the OpenCL +image object will result in undefined behavior. + +The {clRetainMemObject} and {clReleaseMemObject} functions can be used to +retain and release the image objects. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromGLTexture} returns a valid non-zero OpenCL image object and +_errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context or was not + created from an OpenGL context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if + value specified in _texture_target_ is not one of the values specified + in the description of _texture_target_. + * {CL_INVALID_MIP_LEVEL} if _miplevel_ is less than the value of + _level~base~_ (for OpenGL implementations) or zero (for OpenGL ES + implementations); or greater than the value of _q_ (for both OpenGL and + OpenGL ES). + _level~base~_ and _q_ are defined for the texture in _section 3.8.10_ + (Texture Completeness) of the OpenGL 2.1 Specification and _section + 3.7.10_ of the OpenGL ES 2.0 Specification. + * {CL_INVALID_MIP_LEVEL} if _miplevel_ is greather than zero and the + OpenGL implementation does not support creating from non-zero mipmap + levels. + * {CL_INVALID_GL_OBJECT} if _texture_ is not an OpenGL texture object + whose type matches _texture_target_, if the specified _miplevel_ of + _texture_ is not defined, or if the width or height of the specified + _miplevel_ is zero or if the OpenGL texture object is incomplete. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the internal format of _texture_ + is not listed in the <> table. + * {CL_INVALID_OPERATION} if _texture_ is an OpenGL texture object created + with a border width value greater than zero. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +ifdef::cl_khr_gl_depth_images[] +[[restrictions-on-depth-stencil-images]] +==== Restrictions on Depth/Stencil Images + +Depth images with an image channel order of {CL_DEPTH_STENCIL} can only be +created using the {clCreateFromGLTexture} API, and only when the +`<>` extension is supported. + +For the image format given by channel order of {CL_DEPTH_STENCIL} and +channel data type of {CL_UNORM_INT24}, the depth is stored as an unsigned +normalized 24-bit value. + +For the image format given by channel order of {CL_DEPTH_STENCIL} and +channel data type of {CL_FLOAT}, each pixel is two 32-bit values. +The depth is stored as a single precision floating-point value followed by +the stencil which is stored as a 8-bit integer value. + +Such images appear in the <>, but only require read support, +not write support. + +The stencil value cannot be read or written using the *read_imagef* and +*write_imagef* built-in functions in an OpenCL kernel. + +Depth image objects with an image channel order of {CL_DEPTH_STENCIL} cannot +be used as arguments to {clEnqueueReadImage}, {clEnqueueWriteImage}, +{clEnqueueCopyImage}, {clEnqueueCopyImageToBuffer}, +{clEnqueueCopyBufferToImage}, {clEnqueueMapImage}, and {clEnqueueFillImage}. +Such use will return a {CL_INVALID_OPERATION} error. +endif::cl_khr_gl_depth_images[] + + +ifdef::cl_khr_gl_msaa_sharing[] +[[restrictions-on-msaa-images]] +==== Restrictions on Multi-Sample Images + +The formats described in the <> and <> tables of the OpenCL 3.0 specification, specification and the +additional formats described in the <> table also +support OpenCL images created from a OpenGL multi-sampled color or depth +texture. + +Multi-sample OpenCL image objects can only be read from a kernel. +Multi-sample OpenCL image objects cannot be used as arguments to +{clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueCopyImage}, +{clEnqueueCopyImageToBuffer}, {clEnqueueCopyBufferToImage}, +{clEnqueueMapImage}, and {clEnqueueFillImage}. +Such use will return a {CL_INVALID_OPERATION} error. +endif::cl_khr_gl_msaa_sharing[] + + +[open,refpage='clCreateFromGLRenderbuffer',desc='Create OpenCL 2D image object from an OpenGL renderbuffer',type='protos'] +-- +To create an OpenCL 2D image object from an OpenGL renderbuffer object, call +the function + +include::{generated}/api/protos/clCreateFromGLRenderbuffer.txt[] +include::{generated}/api/version-notes/clCreateFromGLRenderbuffer.asciidoc[] + + * _context_ is a valid OpenCL context created from an OpenGL context. + * _flags_ is a bit-field that is used to specify usage information. + Refer to the <> table for a + description of _flags_. + Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} + flags specified in that table can be used. + * _renderbuffer_ is the name of an OpenGL renderbuffer object. + The renderbuffer storage must be specified before the image object can + be created. + The _renderbuffer_ format and dimensions defined by OpenGL will be used + to create the 2D image object. + Only OpenGL renderbuffers with an internal format that maps to an + appropriate <> and + <> may be used + to create the 2D image object. + * _errcode_ret_ will return an appropriate error code as described below. + If _errcode_ret_ is `NULL`, no error code is returned. + +If the state of an OpenGL renderbuffer object is modified through the OpenGL +API (i.e. changes to the dimensions or format used to represent pixels of +the OpenGL renderbuffer using appropriate OpenGL API calls such as +`glRenderbufferStorage`) while there exists a corresponding OpenCL image +object, subsequent use of the OpenCL image object will result in undefined +behavior. + +The {clRetainMemObject} and {clReleaseMemObject} functions can be used to +retain and release the image objects. + +The <> table describes the list of OpenGL +renderbuffer internal formats and the Corresponding OpenCL Image Formats. +If an OpenGL renderbuffer object with an internal format from the table is +successfully created by OpenGL, then there is guaranteed to be a mapping to +one of the corresponding OpenCL image format(s) in that table. +Renderbuffer objects created with other OpenGL internal formats may (but are +not guaranteed to) have a mapping to an OpenCL image format; if such +mappings exist, they are guaranteed to preserve all color components, data +types, and at least the number of bits/component actually allocated by +OpenGL for that format. + +NOTE: Refer to the <> and +<> sections for +more information. + +// refError + +{clCreateFromGLRenderbuffer} returns a valid non-zero OpenCL image object +and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context or was not + created from an OpenGL context. + * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. + * {CL_INVALID_GL_OBJECT} if _renderbuffer_ is not an OpenGL renderbuffer + object, or if the width or height of _renderbuffer_ is zero. + * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the internal format of + _renderbuffer_ is not listed in the <> table. + * {CL_INVALID_OPERATION} if _renderbuffer_ is a multi-sample OpenGL + renderbuffer object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_gl_sharing[] + + +== Pipes + +NOTE: Pipes are <> version 2.0. + +A _pipe_ is a memory object that stores data organized as a FIFO. +Pipe objects can only be accessed using built-in functions that read from +and write to a pipe. +Pipe objects are not accessible from the host. +A pipe object encapsulates the following information: + + * Packet size in bytes + * Maximum capacity in packets + * Information about the number of packets currently in the pipe + * Data packets + + +=== Creating Pipe Objects + +[open,refpage='clCreatePipe',desc='Creates a pipe object.',type='protos'] +-- +To create a *pipe object*, call the function + +include::{generated}/api/protos/clCreatePipe.txt[] +include::{generated}/api/version-notes/clCreatePipe.asciidoc[] + + * _context_ is a valid OpenCL context used to create the pipe object. + * _flags_ is a bit-field that is used to specify allocation and usage + information such as the memory arena that should be used to allocate the + pipe object and how it will be used. + The <> table describes the possible values for + _flags_. + Only {CL_MEM_READ_WRITE} and {CL_MEM_HOST_NO_ACCESS} can be specified when + creating a pipe object. + If the value specified for _flags_ is 0, the default is used which is + {CL_MEM_READ_WRITE} | {CL_MEM_HOST_NO_ACCESS}. + * _pipe_packet_size_ is the size in bytes of a pipe packet. + * _pipe_max_packets_ specifies the pipe capacity by specifying the maximum + number of packets the pipe can hold. + * _properties_ specifies a list of properties for the pipe and their + corresponding values. + Each property name is immediately followed by the corresponding desired + value. + The list is terminated with 0. + Currently, in all OpenCL versions, _properties_ must be `NULL`. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +// refError + +{clCreatePipe} returns a valid non-zero pipe object and _errcode_ret_ is set +to {CL_SUCCESS} if the pipe object is created successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_OPERATION} if no devices in _context_ support pipes. + * {CL_INVALID_VALUE} if values specified in _flags_ are not as defined + above. + * {CL_INVALID_VALUE} if _properties_ is not `NULL`. + * {CL_INVALID_PIPE_SIZE} if _pipe_packet_size_ is 0 or the + _pipe_packet_size_ exceeds {CL_DEVICE_PIPE_MAX_PACKET_SIZE} value + specified in the <> table for all + devices in _context_ or if _pipe_max_packets_ is 0. + * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate + memory for the pipe object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +Pipes follow the same memory consistency model as defined for buffer and image objects. The pipe state i.e. contents of the pipe across kernel-instances (on the same or different devices) is enforced at a synchronization point. @@ -3834,67 +5386,347 @@ of OpenCL APIs is considered to be undefined. -- -[[unmapping-mapped-memory]] -=== Unmapping Mapped Memory Objects +ifdef::cl_khr_external_memory[] -[open,refpage='clEnqueueUnmapMemObject',desc='Enqueues a command to unmap a previously mapped region of a memory object.',type='protos'] +[[acquiring-external-memory]] +==== Acquiring and Releasing External Memory Objects + +[open,refpage='clEnqueueAcquireExternalMemObjectsKHR',desc='Enqueue a command to acquire OpenCL memory objects created from external memory handles',type='protos'] -- -To enqueue a command to unmap a previously mapped region of a memory object, -call the function +To enqueue a command to acquire OpenCL memory objects created from external +memory handles, call the function -include::{generated}/api/protos/clEnqueueUnmapMemObject.txt[] -include::{generated}/api/version-notes/clEnqueueUnmapMemObject.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireExternalMemObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireExternalMemObjectsKHR.asciidoc[] - * _command_queue_ must be a valid host command-queue. - * _memobj_ is a valid memory (buffer or image) object. - The OpenCL context associated with _command_queue_ and _memobj_ must be the - same. - * _mapped_ptr_ is the host address returned by a previous call to - {clEnqueueMapBuffer}, or {clEnqueueMapImage} for _memobj_. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before {clEnqueueUnmapMemObject} can be executed. - If _event_wait_list_ is `NULL`, then {clEnqueueUnmapMemObject} does not wait - on any event to complete. + * _command_queue_ specifies a valid command-queue. + * _num_mem_objects_ specifies the number of memory objects to acquire. + * _mem_objects_ points to a list of valid memory objects. + * _num_events_in_wait_list_ specifies the number of events in + _event_wait_list_. + * _event_wait_list_ points to the list of events that need to complete + before {clEnqueueAcquireExternalMemObjectsKHR} can be executed. + If _event_wait_list_ is `NULL`, then + {clEnqueueAcquireExternalMemObjectsKHR} does not explicitly wait on any + event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. - -Reads or writes from the host using the pointer returned by -{clEnqueueMapBuffer} or {clEnqueueMapImage} are considered to be complete. - -{clEnqueueMapBuffer} and {clEnqueueMapImage} increment the mapped count of -the memory object. -The initial mapped count value of the memory object is zero. -Multiple calls to {clEnqueueMapBuffer}, or {clEnqueueMapImage} on the same -memory object will increment this mapped count by appropriate number of -calls. -{clEnqueueUnmapMemObject} decrements the mapped count of the memory object. - -{clEnqueueMapBuffer}, and {clEnqueueMapImage} act as synchronization points -for a region of the buffer object being mapped. + The context associated with events in _event_wait_list_ and that of + _command_queue_ must be the same. + * _event_ returns an event object that identifies this particular command + and can be used to query or queue a wait for this particular command to + complete. + _event_ can be `NULL` in which case it will not be possible for the + application to query the status of this command or queue a wait for this + command to complete. + +Applications must acquire the memory objects that are created using external +handles before they can be used by any OpenCL commands queued to a +command-queue. +Behavior is undefined if a memory object created from an external memory +handle is used by an OpenCL command queued to a command-queue without being +acquired. +This is to guarantee that the state of the memory objects is up-to-date and +they are accessible to OpenCL. +See <> for more details on how to use this API. + +If _num_mem_objects_ is 0 and _mem_objects_ is `NULL`, the command will +trivially succeed after its event dependencies are satisfied and will update +its completion event. // refError -{clEnqueueUnmapMemObject} returns {CL_SUCCESS} if the function is executed -successfully. +{clEnqueueAcquireExternalMemObjectsKHR} returns {CL_SUCCESS} if the function +is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_INVALID_MEM_OBJECT} if _memobj_ is not a valid memory object or is a + * {CL_INVALID_VALUE} if _num_mem_objects_ is zero and _mem_objects_ is not + a `NULL` value, or if _num_mem_objects_ is greater than 0 and + _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if any of the memory objects in _mem_objects_ is + not a valid OpenCL memory object created using an external memory + handle. + * {CL_INVALID_COMMAND_QUEUE} + ** if _command_queue_ is not a valid command-queue, or + ** if device associated with _command_queue_ is not one of the devices + specified by {CL_MEM_DEVICE_HANDLE_LIST_KHR} at the time of creating + one or more of _mem_objects_, or ** if one or more of _mem_objects_ + belong to a context that does not contain a device associated with + _command_queue_. + * {CL_INVALID_EVENT_WAIT_LIST} + ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, + or + ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, + or + ** if event objects in _event_wait_list_ are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status + of any of the events in _event_wait_list_ is a negative integer value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueReleaseExternalMemObjectsKHR',desc='Enqueue a command to release OpenCL memory objects created from external memory handles',type='protos'] +-- +To enqueue a command to release OpenCL memory objects created from external +memory handles, call the function + +include::{generated}/api/protos/clEnqueueReleaseExternalMemObjectsKHR.txt[] + + * _command_queue_ specifies a valid command-queue. + * _num_mem_objects_ specifies the number of memory objects to release. + * _mem_objects_ points to a list of valid memory objects. + * _num_events_in_wait_list_ specifies the number of events in + _event_wait_list_. + * _event_wait_list_ points to the list of events that need to complete + before {clEnqueueReleaseExternalMemObjectsKHR} can be executed. + If _event_wait_list_ is `NULL`, then + {clEnqueueReleaseExternalMemObjectsKHR} does not wait on any event to + complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and that of + _command_queue_ must be the same. + * _event_ returns an event object that identifies this particular command + and can be used to query or queue a wait for this particular command to + complete. + _event_ can be `NULL` in which case it will not be possible for the + application to query the status of this command or queue a wait for this + command to complete. + +Applications must release the memory objects that are acquired using +{clEnqueueReleaseExternalMemObjectsKHR} before using them through any +commands in the other API. +This is to guarantee that the state of memory objects is up-to-date and they +are accessible to the other API. +See "`Example with Acquire / Release`" provided in +<> for more details on how to use this +API. + +If _num_mem_objects_ is 0 and _mem_objects_ is `NULL`, the command will +trivially succeed after its event dependencies are satisfied and will update +its completion event. + +// refError + +{clEnqueueReleaseExternalMemObjectsKHR} returns {CL_SUCCESS} if the function +is executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_mem_objects_ is zero and _mem_objects_ is not + a `NULL` value, or if _num_mem_objects_ is greater than 0 and + _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if any of the memory objects in _mem_objects_ is + not a valid OpenCL memory object created using an external memory + handle. + * {CL_INVALID_COMMAND_QUEUE} + ** if _command_queue_ is not a valid command-queue, or + ** if device associated with _command_queue_ is not one of the devices + specified by {CL_MEM_DEVICE_HANDLE_LIST_KHR} at the time of creating + one or more of _mem_objects_, or + ** if one or more of _mem_objects_ belong to a context that does not + contain a device associated with _command_queue_. + * {CL_INVALID_EVENT_WAIT_LIST} + ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, + or + ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, + or + ** if event objects in _event_wait_list_ are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status + of any of the events in _event_wait_list_ is a negative integer value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +[[external-memory-handle-types]] +=== Descriptions of External Memory Handle Types + +This section describes external memory handle types that are added by +extensions. + +Applications can import the same payload into multiple OpenCL contexts and +multiple times into a given OpenCL context. In all cases, each import +operation must create a distinct memory object. + + +==== File Descriptor Handle Types + +ifdef::cl_khr_external_memory_opaque_fd[] +The `<>` extension extends +{cl_external_memory_handle_type_khr_TYPE} to support the following new types +of handles, and adds as a property that may be specified when creating a +buffer or an image memory object from an external handle: + + * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR} specifies a POSIX file + descriptor handle that has only limited valid usage outside of OpenCL + and other compatible APIs. + It must be compatible with the POSIX system calls `dup`, `dup2`, + `close`, and the non-standard system call `dup3`. + Additionally, it must be transportable over a socket using a + `SCM_RIGHTS` control message. + It owns a reference to the underlying memory resource represented by its + memory object. +endif::cl_khr_external_memory_opaque_fd[] + +ifdef::cl_khr_external_memory_dma_buf[] +The `<>` extension extends +{cl_external_memory_handle_type_khr_TYPE} to support the following types of +handles, and adds as a property that may be specified when creating a buffer +or an image memory object from an external handle: + + * {CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR} is a file descriptor for a Linux + dma_buf. + It owns a reference to the underlying memory resource represented by its + memory object. +endif::cl_khr_external_memory_dma_buf[] + +For these extensions, importing memory from a file descriptor transfers +ownership of the file descriptor from the application to the OpenCL +implementation. +The application must not perform any operations on the file descriptor after +a successful import. +The imported memory object holds a reference to its payload. + + +==== NT Handle Types + +ifdef::cl_khr_external_memory_dx[] +The `<>` extension extends +{cl_external_memory_handle_type_khr_TYPE} to support the following new types +of handles, and adds as a property that may be specified when creating a +buffer or an image memory object from an external handle: + + * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR} specifies an NT handle + returned by `IDXGIResource1::CreateSharedHandle` referring to a Direct3D + 10 or 11 texture resource. + It owns a reference to the memory used by the Direct3D resource. + * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR} specifies a global + share handle returned by `IDXGIResource::GetSharedHandle` referring to a + Direct3D 10 or 11 texture resource. + It does not own a reference to the underlying Direct3D resource, and + will therefore become invalid when all memory objects and Direct3D + resources associated with it are destroyed. + * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR} specifies an NT handle + returned by `ID3D12Device::CreateSharedHandle` referring to a Direct3D + 12 heap resource. + It owns a reference to the resources used by the Direct3D heap. + * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR} specifies an NT handle + returned by `ID3D12Device::CreateSharedHandle` referring to a Direct3D + 12 committed resource. + It owns a reference to the memory used by the Direct3D resource. +endif::cl_khr_external_memory_dx[] + +ifdef::cl_khr_external_memory_win32[] +The `<>` extension extends +{cl_external_memory_handle_type_khr_TYPE} to support the following new types +of handles, and adds as a property that may be specified when creating a +buffer or an image memory object from an external handle: + + * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR} specifies an NT handle that + has only limited valid usage outside of OpenCL and other compatible + APIs. + It must be compatible with the functions `DuplicateHandle`, + `CloseHandle`, `CompareObjectHandles`, `GetHandleInformation`, and + `SetHandleInformation`. + It owns a reference to the underlying memory resource represented by its + memory object. + * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR} specifies a global + share handle that has only limited valid usage outside of OpenCL and + other compatible APIs. + It is not compatible with any native APIs. + It does not own a reference to the underlying memory resource + represented by its memory object, and will therefore become invalid when + all memory objects associated with it are destroyed. +endif::cl_khr_external_memory_win32[] + +For these extensions, importing memory object payloads from Windows handles +does not transfer ownership of the handle to the OpenCL implementation. +For handle types defined as NT handles, the application must release handle +ownership using the CloseHandle system call when the handle is no longer +needed. +For handle types defined as NT handles, the imported memory object holds a +reference to its payload. + +Note: Non-NT handle import operations do not add a reference to their +associated payload. +If the original object owning the payload is destroyed, all resources and +handles sharing that payload will become invalid. + +endif::cl_khr_external_memory[] + + +[[unmapping-mapped-memory]] +=== Unmapping Mapped Memory Objects + +[open,refpage='clEnqueueUnmapMemObject',desc='Enqueues a command to unmap a previously mapped region of a memory object.',type='protos'] +-- +To enqueue a command to unmap a previously mapped region of a memory object, +call the function + +include::{generated}/api/protos/clEnqueueUnmapMemObject.txt[] +include::{generated}/api/version-notes/clEnqueueUnmapMemObject.asciidoc[] + + * _command_queue_ must be a valid host command-queue. + * _memobj_ is a valid memory (buffer or image) object. + The OpenCL context associated with _command_queue_ and _memobj_ must be the + same. + * _mapped_ptr_ is the host address returned by a previous call to + {clEnqueueMapBuffer}, or {clEnqueueMapImage} for _memobj_. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before {clEnqueueUnmapMemObject} can be executed. + If _event_wait_list_ is `NULL`, then {clEnqueueUnmapMemObject} does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +Reads or writes from the host using the pointer returned by +{clEnqueueMapBuffer} or {clEnqueueMapImage} are considered to be complete. + +{clEnqueueMapBuffer} and {clEnqueueMapImage} increment the mapped count of +the memory object. +The initial mapped count value of the memory object is zero. +Multiple calls to {clEnqueueMapBuffer}, or {clEnqueueMapImage} on the same +memory object will increment this mapped count by appropriate number of +calls. +{clEnqueueUnmapMemObject} decrements the mapped count of the memory object. + +{clEnqueueMapBuffer}, and {clEnqueueMapImage} act as synchronization points +for a region of the buffer object being mapped. + +// refError + +{clEnqueueUnmapMemObject} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_MEM_OBJECT} if _memobj_ is not a valid memory object or is a pipe object. * {CL_INVALID_VALUE} if _mapped_ptr_ is not a valid pointer returned by {clEnqueueMapBuffer} or {clEnqueueMapImage} for _memobj_. @@ -3913,7 +5745,7 @@ Otherwise, it returns one of the following errors: [[accessing-mapped-regions]] -=== Accessing mapped regions of a memory object +=== Accessing Mapped Regions of a Memory Object This section describes the behavior of OpenCL commands that access mapped regions of a memory object. @@ -3961,8 +5793,8 @@ pointer that has been unmapped is undefined. The mapped pointer returned by {clEnqueueMapBuffer} or {clEnqueueMapImage} can be used as the _ptr_ argument value to {clEnqueueReadBuffer}, -{clEnqueueWriteBuffer}, {clEnqueueReadBufferRect}, -{clEnqueueWriteBufferRect}, {clEnqueueReadImage}, or +{clEnqueueWriteBuffer}, {clEnqueueReadBufferRect}, +{clEnqueueWriteBufferRect}, {clEnqueueReadImage}, or {clEnqueueWriteImage} provided the rules described above are adhered to. @@ -4246,13 +6078,51 @@ include::{generated}/api/version-notes/CL_MEM_PROPERTIES.asciidoc[] return the values specified in the properties argument in the same order and without including additional properties. - If _memobj_ was created using {clCreateBuffer}, + If _memobj_ was created using {clCreateBuffer}, {clCreateSubBuffer}, {clCreateImage}, {clCreateImage2D}, or {clCreateImage3D}, or if the _properties_ argument specified in {clCreateBufferWithProperties} or {clCreateImageWithProperties} was `NULL`, the implementation must return _param_value_size_ret_ equal to 0, indicating that there are no properties to be returned. + +ifdef::cl_khr_dx9_media_sharing[] +| {CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR.asciidoc[] + | {cl_dx9_media_adapter_type_khr_TYPE} + | If _memobj_ was created using {clCreateFromDX9MediaSurfaceKHR}, + returns the _adapter_type_ argument specified when _memobj_ was + created. +| {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR_anchor} + +include::{generated}/api/version-notes/CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR.asciidoc[] + | {cl_dx9_surface_info_khr_TYPE} + | If _memobj_ was created using {clCreateFromDX9MediaSurfaceKHR}, + returns the _surface_info_ argument specified when _memobj_ was + created. +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +| {CL_MEM_D3D10_RESOURCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MEM_D3D10_RESOURCE_KHR.asciidoc[] + | `ID3D10Resource *` + | If _memobj_ was created using {clCreateFromD3D10BufferKHR}, + {clCreateFromD3D10Texture2DKHR}, or {clCreateFromD3D10Texture3DKHR}, + returns the _resource_ argument specified when _memobj_ was created. +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +| {CL_MEM_D3D11_RESOURCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MEM_D3D11_RESOURCE_KHR.asciidoc[] + | `ID3D11Resource *` + | If _memobj_ was created using {clCreateFromD3D11BufferKHR}, + {clCreateFromD3D11Texture2DKHR}, or {clCreateFromD3D11Texture3DKHR}, + returns the _resource_ argument specified when _memobj_ was created. +endif::cl_khr_d3d11_sharing[] + |==== // refError @@ -4270,1501 +6140,1844 @@ Otherwise, it returns one of the following errors: by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. --- +ifdef::cl_khr_dx9_media_sharing[] +The following errors may be returned if the `<>` +extension is supported: -== Shared Virtual Memory + * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _param_name_ is + {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} and _memobj_ was not created by + calling {clCreateFromDX9MediaSurfaceKHR} from a Direct3D9 surface. +endif::cl_khr_dx9_media_sharing[] -NOTE: Shared virtual memory is <> version 2.0. +ifdef::cl_khr_d3d10_sharing[] +The following errors may be returned if the `<>` +extension is supported: -Shared virtual memory (a.k.a. SVM) allows the host and kernels executing on -devices to directly share complex, pointer-containing data structures such as -trees and linked lists. -It also eliminates the need to marshal data between the host and devices. -As a result, SVM substantially simplifies OpenCL programming and may improve -performance. + * {CL_INVALID_D3D10_RESOURCE_KHR} if _param_name_ is + {CL_MEM_D3D10_RESOURCE_KHR} and _memobj_ was not created by calling + {clCreateFromD3D10BufferKHR}, {clCreateFromD3D10Texture2DKHR}, or + {clCreateFromD3D10Texture3DKHR}. +endif::cl_khr_d3d10_sharing[] +ifdef::cl_khr_d3d11_sharing[] +The following errors may be returned if the `<>` +extension is supported: -=== SVM sharing granularity: coarse- and fine- grained sharing + * {CL_INVALID_D3D11_RESOURCE_KHR} if _param_name_ is + {CL_MEM_D3D11_RESOURCE_KHR} and _memobj_ was not created by calling + {clCreateFromD3D11BufferKHR}, {clCreateFromD3D11Texture2DKHR}, or + {clCreateFromD3D11Texture3DKHR}. +endif::cl_khr_d3d11_sharing[] -OpenCL maintains memory consistency in a coarse-grained fashion in regions -of buffers. -We call this coarse-grained sharing. -Many platforms such as those with integrated CPU-GPU processors and ones -using the SVM-related PCI-SIG IOMMU services can do better, and can support -sharing at a granularity smaller than a buffer. -We call this fine-grained sharing. +-- - * Coarse-grained sharing: Coarse-grain sharing may be used for memory and - virtual pointer sharing between multiple devices as well as between the - host and one or more devices. - The shared memory region is a memory buffer allocated using - {clSVMAlloc}. - Memory consistency is guaranteed at synchronization points and the host - can use calls to {clEnqueueSVMMap} and {clEnqueueSVMUnmap} or create a - {cl_mem_TYPE} buffer object using the SVM pointer and use OpenCL's existing host - API functions {clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} to - update regions of the buffer. - What coarse-grain buffer SVM adds to OpenCL's earlier buffer support are - the ability to share virtual memory pointers and a guarantee that - concurrent access to the same memory allocation from multiple kernels on - a single device is valid. - The coarse-grain buffer SVM provides a memory consistency model similar - to the global memory consistency model described in _sections 3.3.1_ and - _3.4.3_ of the OpenCL 1.2 specification. - This memory consistency applies to the regions of buffers being shared - in a coarse-grained fashion. - It is enforced at the synchronization points between commands enqueued - to command-queues in a single context with the additional consideration - that multiple kernels concurrently running on the same device may safely - share the data. - * Fine-grained sharing: Shared virtual memory where memory consistency is - maintained at a granularity smaller than a buffer. - How fine-grained SVM is used depends on whether the device supports SVM - atomic operations. - ** If SVM atomic operations are supported, they provide memory consistency - for loads and stores by the host and kernels executing on devices - supporting SVM. - This means that the host and devices can concurrently read and update - the same memory. - The consistency provided by SVM atomics is in addition to the - consistency provided at synchronization points. - There is no need for explicit calls to {clEnqueueSVMMap} and - {clEnqueueSVMUnmap} or {clEnqueueMapBuffer} and - {clEnqueueUnmapMemObject} on a {cl_mem_TYPE} buffer object created using the - SVM pointer. - ** If SVM atomic operations are not supported, the host and devices can - concurrently read the same memory locations and can concurrently update - non-overlapping memory regions, but attempts to update the same memory - locations are undefined. - Memory consistency is guaranteed at synchronization points without the - need for explicit calls to {clEnqueueSVMMap} and {clEnqueueSVMUnmap} - or {clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} on a {cl_mem_TYPE} - buffer object created using the SVM pointer. - * There are two kinds of fine-grain sharing support. - Devices may support either fine-grain buffer sharing or fine-grain - system sharing. - ** Fine-grain buffer sharing provides fine-grain SVM only within buffers - and is an extension of coarse-grain sharing. - To support fine-grain buffer sharing in an OpenCL context, all devices - in the context must support {CL_DEVICE_SVM_FINE_GRAIN_BUFFER}. - ** Fine-grain system sharing enables fine-grain sharing of the host's - entire virtual memory, including memory regions allocated by the system - *malloc* API. - OpenCL buffer objects are unnecessary and programmers can pass pointers - allocated using *malloc* to OpenCL kernels. -As an illustration of fine-grain SVM using SVM atomic operations to maintain -memory consistency, consider the following example. -The host and a set of devices can simultaneously access and update a shared -work-queue data structure holding work-items to be done. -The host can use atomic operations to insert new work-items into the queue -at the same time as the devices using similar atomic operations to remove -work-items for processing. +ifdef::cl_khr_dx9_media_sharing[] +=== Querying Media Surface Properties of Memory Objects Created From DirectX 9 Media Surfaces -It is the programmer's responsibility to ensure that no host code or -executing kernels attempt to access a shared memory region after that memory -is freed. -We require the SVM implementation to work with either 32- or 64- bit host -applications subject to the following requirement: the address space size -must be the same for the host and all OpenCL devices in the context. +Properties of media surface objects may be queried using {clGetMemObjectInfo} +and {clGetImageInfo} with _param_name_ {CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR}, +{CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} and {CL_IMAGE_DX9_MEDIA_PLANE_KHR}. +endif::cl_khr_dx9_media_sharing[] -[open,refpage='clSVMAlloc',desc='Allocates a shared virtual memory (SVM) buffer that can be shared by the host and all devices in an OpenCL context that support shared virtual memory.',type='protos'] --- -To allocate a shared virtual memory buffer (referred to as a SVM buffer) -that can be shared by the host and all devices in an OpenCL context that -support shared virtual memory, call the function -include::{generated}/api/protos/clSVMAlloc.txt[] -include::{generated}/api/version-notes/clSVMAlloc.asciidoc[] +ifdef::cl_khr_d3d10_sharing[] +=== Querying Direct3D Properties of Memory Objects Created From Direct3D 10 Resources - * _context_ is a valid OpenCL context used to create the SVM buffer. - * _flags_ is a bit-field that is used to specify allocation and usage - information. - The <> table describes the possible values - for _flags_. - * _size_ is the size in bytes of the SVM buffer to be allocated. - * _alignment_ is the minimum alignment in bytes that is required for the newly - created buffers memory region. - It must be a power of two up to the largest data type supported by the - OpenCL device. - For the full profile, the largest data type is long16. - For the embedded profile, it is long16 if the device supports 64-bit - integers; otherwise it is int16. - If alignment is 0, a default alignment will be used that is equal to the - size of largest data type supported by the OpenCL implementation. +Properties of Direct3D 10 objects may be queried using {clGetMemObjectInfo} +and {clGetImageInfo} with _param_name_ {CL_MEM_D3D10_RESOURCE_KHR} and +{CL_IMAGE_D3D10_SUBRESOURCE_KHR} respectively. +endif::cl_khr_d3d10_sharing[] -[[svm-flags-table]] -.List of supported SVM memory flag values -[width="100%",cols="<50%,<50%",options="header"] -|==== -| SVM Memory Flags | Description -| {CL_MEM_READ_WRITE} - | This flag specifies that the SVM buffer will be read and written by a - kernel. - This is the default. -| {CL_MEM_WRITE_ONLY} - | This flag specifies that the SVM buffer will be written but not read by - a kernel. - Reading from a SVM buffer created with {CL_MEM_WRITE_ONLY} inside a kernel - is undefined. +ifdef::cl_khr_d3d11_sharing[] +=== Querying Direct3D Properties of Memory Objects Created From Direct3D 11 Resources - {CL_MEM_READ_WRITE} and {CL_MEM_WRITE_ONLY} are mutually exclusive. -| {CL_MEM_READ_ONLY} - | This flag specifies that the SVM buffer object is a read-only memory - object when used inside a kernel. +Properties of Direct3D 11 objects may be queried using {clGetMemObjectInfo} +and {clGetImageInfo} with _param_name_ {CL_MEM_D3D11_RESOURCE_KHR} and +{CL_IMAGE_D3D11_SUBRESOURCE_KHR} respectively. +endif::cl_khr_d3d11_sharing[] - Writing to a SVM buffer created with {CL_MEM_READ_ONLY} inside a kernel is - undefined. - {CL_MEM_READ_WRITE} or {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_ONLY} are mutually - exclusive. -| {CL_MEM_SVM_FINE_GRAIN_BUFFER_anchor} +ifdef::cl_khr_gl_sharing[] +=== Querying OpenGL Object Information From an OpenCL Memory Object -include::{generated}/api/version-notes/CL_MEM_SVM_FINE_GRAIN_BUFFER.asciidoc[] - | This specifies that the application wants the OpenCL implementation to - do a fine-grained allocation. -| {CL_MEM_SVM_ATOMICS_anchor} +[open,refpage='clGetGLObjectInfo',desc='Query OpenGL object and object type used to create an OpenCL memory object',type='protos'] +-- +To query the OpenGL object and object type used to create an OpenCL memory +object, call the function -include::{generated}/api/version-notes/CL_MEM_SVM_ATOMICS.asciidoc[] - | This flag is valid only if {CL_MEM_SVM_FINE_GRAIN_BUFFER} is specified in - flags. - It is used to indicate that SVM atomic operations can control visibility - of memory accesses in this SVM buffer. -|==== +include::{generated}/api/protos/clGetGLObjectInfo.txt[] +include::{generated}/api/version-notes/clGetGLObjectInfo.asciidoc[] -If {CL_MEM_SVM_FINE_GRAIN_BUFFER} is not specified, the buffer can be created -as a coarse grained SVM allocation. -Similarly, if {CL_MEM_SVM_ATOMICS} is not specified, the buffer can be created -without support for SVM atomic operations (refer to an OpenCL kernel -language specifications). + * _memobj_ is the memory object to query. + * _gl_object_type_ returns the type of OpenGL object attached to _memobj_ + and can be {CL_GL_OBJECT_BUFFER}, {CL_GL_OBJECT_TEXTURE2D}, + {CL_GL_OBJECT_TEXTURE3D}, {CL_GL_OBJECT_TEXTURE2D_ARRAY}, + {CL_GL_OBJECT_TEXTURE1D}, {CL_GL_OBJECT_TEXTURE1D_ARRAY}, + {CL_GL_OBJECT_TEXTURE_BUFFER}, or {CL_GL_OBJECT_RENDERBUFFER}. + If _gl_object_type_ is `NULL`, it is ignored + * _gl_object_name_ returns the OpenGL object name used to create _memobj_. + If _gl_object_name_ is `NULL`, it is ignored. -Calling {clSVMAlloc} does not itself provide consistency for the shared -memory region. -When the host cannot use the SVM atomic operations, it must rely on OpenCL's -guaranteed memory consistency at synchronization points. +// refError -For SVM to be used efficiently, the host and any devices sharing a buffer -containing virtual memory pointers should have the same endianness. -If the context passed to {clSVMAlloc} has devices with mixed endianness and -the OpenCL implementation is unable to implement SVM because of that mixed -endianness, {clSVMAlloc} will fail and return `NULL`. - -Although SVM is generally not supported for image objects, {clCreateImage} -and {clCreateImageWithProperties} -may create an image from a buffer (a 1D image from a buffer or a 2D image -from buffer) if the buffer specified in its image description parameter is a -SVM buffer. -Such images have a linear memory representation so their memory can be -shared using SVM. -However, fine grained sharing and atomics are not supported for image reads -and writes in a kernel. - -// refError - -{clSVMAlloc} returns a valid non-`NULL` shared virtual memory address if the -SVM buffer is successfully allocated. -Otherwise, like *malloc*, it returns a `NULL` pointer value. -{clSVMAlloc} will fail if +{clGetGLObjectInfo} returns {CL_SUCCESS} if the call was executed +successfully. +Otherwise, it returns one of the following errors: - * _context_ is not a valid context, or no devices in _context_ support SVM. - * _flags_ does not contain {CL_MEM_SVM_FINE_GRAIN_BUFFER} but does contain - {CL_MEM_SVM_ATOMICS}. - * Values specified in _flags_ do not follow rules described for supported - values in the <> table. - * {CL_MEM_SVM_FINE_GRAIN_BUFFER} or {CL_MEM_SVM_ATOMICS} is specified in - _flags_ and these are not supported by at least one device in _context_. - * The values specified in _flags_ are not valid, i.e. don't match those - defined in the <> table. - * _size_ is 0 or > {CL_DEVICE_MAX_MEM_ALLOC_SIZE} value for any device in - _context_. - * _alignment_ is not a power of two or the OpenCL implementation cannot - support the specified alignment for at least one device in _context_. - * There was a failure to allocate resources. + * {CL_INVALID_MEM_OBJECT} if _memobj_ is not a valid OpenCL memory object. + * {CL_INVALID_GL_OBJECT} if there is no OpenGL object associated with + _memobj_. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- -[open,refpage='clSVMFree',desc='Frees a shared virtual memory buffer allocated using clSVMAlloc.',type='protos'] +[open,refpage='clGetGLTextureInfo',desc='Query additional information about the OpenGL texture object associated with an OpenCL memory object',type='protos'] -- -To free a shared virtual memory buffer allocated using {clSVMAlloc}, call -the function +To query additional information about the OpenGL texture object associated +with an OpenCL memory object, call the function -include::{generated}/api/protos/clSVMFree.txt[] -include::{generated}/api/version-notes/clSVMFree.asciidoc[] +include::{generated}/api/protos/clGetGLTextureInfo.txt[] +include::{generated}/api/version-notes/clGetGLTextureInfo.asciidoc[] - * _context_ is a valid OpenCL context used to create the SVM buffer. - If no devices in _context_ support SVM, no action occurs. - * _svm_pointer_ must be the value returned by a call to {clSVMAlloc}. - If a `NULL` pointer is passed in _svm_pointer_, no action occurs. + * _memobj_ is the memory object to query. + * _param_name_ specifies what additional information about the OpenGL + texture object associated with _memobj_ to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetGLTextureInfo} is described in the table below. + * _param_value_ is a pointer to memory where the result being queried is + returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory + pointed to by _param_value_. + This size must be >= size of return type as described in the table + below. + * _param_value_size_ret_ returns the actual size in bytes of data copied + to _param_value_. + If _param_value_size_ret_ is `NULL`, it is ignored. -Note that {clSVMFree} does not wait for previously enqueued commands that -may be using _svm_pointer_ to finish before freeing _svm_pointer_. -It is the responsibility of the application to make sure that enqueued -commands that use _svm_pointer_ have finished before freeing _svm_pointer_. -This can be done by enqueuing a blocking operation such as {clFinish}, -{clWaitForEvents}, {clEnqueueReadBuffer} or by registering a callback with -the events associated with enqueued commands and when the last enqueued -command has finished freeing _svm_pointer_. +[[gl-texture-info-queries-table]] +.OpenGL texture info that may be queried with {clGetGLTextureInfo} +[cols=",,",options="header",] +|==== +| {cl_gl_texture_info} | Return Type | Info. Returned in _param_value_ +| {CL_GL_TEXTURE_TARGET_anchor} + +include::{generated}/api/version-notes/CL_GL_TEXTURE_TARGET.asciidoc[] + | `GLenum` + | The _texture_target_ argument specified in {clCreateFromGLTexture}. +| {CL_GL_MIPMAP_LEVEL_anchor} + +include::{generated}/api/version-notes/CL_GL_MIPMAP_LEVEL.asciidoc[] + | `GLint` + | The _miplevel_ argument specified in {clCreateFromGLTexture}. +ifdef::cl_khr_gl_msaa_sharing[] +| {CL_GL_NUM_SAMPLES_anchor} + +include::{generated}/api/version-notes/CL_GL_NUM_SAMPLES.asciidoc[] + | `GLsizei` + | The _samples_ argument passed to `glTexImage2DMultisample` or + `glTexImage3DMultisample`. + + If _image_ is not a MSAA texture, 1 is returned. +endif::cl_khr_gl_msaa_sharing[] +|==== -The behavior of using _svm_pointer_ after it has been freed is undefined. -In addition, if a buffer object is created using {clCreateBuffer} or -{clCreateBufferWithProperties} with _svm_pointer_, the buffer object must -first be released before the _svm_pointer_ is freed. +// refError -The {clEnqueueSVMFree} API can also be used to enqueue a callback to free -the shared virtual memory buffer allocated using {clSVMAlloc} or a shared -system memory pointer. +{clGetGLTextureInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_MEM_OBJECT} if _memobj_ is not a valid OpenCL memory object. + * {CL_INVALID_GL_OBJECT} if there is no OpenGL texture object associated + with _memobj_. + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is less than the size of the return type + as described in the table above and _param_value_ is not `NULL`, or if + _param_value_ and _param_value_size_ret_ are `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- +endif::cl_khr_gl_sharing[] -[open,refpage='clEnqueueSVMFree',desc='Enqueues a command to free shared virtual memory allocated using clSVMAlloc or a shared system memory pointer.',type='protos'] + +ifdef::cl_khr_dx9_media_sharing[] +=== Sharing Memory Objects Created From Media Surfaces Between a Media Adapter and OpenCL + +[open,refpage='clEnqueueAcquireDX9MediaSurfacesKHR',desc='Acquire OpenCL memory objects created from a media surface',type='protos'] -- -To enqueue a command to free the shared virtual memory allocated using -{clSVMAlloc} or a shared system memory pointer, call the function +To acquire OpenCL memory objects that have been created from a media +surface, call the function -include::{generated}/api/protos/clEnqueueSVMFree.txt[] -include::{generated}/api/version-notes/clEnqueueSVMFree.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireDX9MediaSurfacesKHR.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireDX9MediaSurfacesKHR.asciidoc[] - * _command_queue_ is a valid host command-queue. - * _svm_pointers_ and _num_svm_pointers_ specify shared virtual memory pointers - to be freed. - Each pointer in _svm_pointers_ that was allocated using {clSVMAlloc} must - have been allocated from the same context from which _command_queue_ was - created. - The memory associated with _svm_pointers_ can be reused or freed after the - function returns. - * _pfn_free_func_ specifies the callback function to be called to free the SVM - pointers. - This callback function may be called asynchronously by the OpenCL - implementation. - It is the application's responsibility to ensure that the callback function - is thread-safe. - _pfn_free_func_ takes four arguments: _queue_ which is the command-queue in - which {clEnqueueSVMFree} was enqueued, the count and list of SVM pointers to - free and _user_data_ which is a pointer to user specified data. - If _pfn_free_func_ is `NULL`, all pointers specified in _svm_pointers_ must - be allocated using {clSVMAlloc} and the OpenCL implementation will free - these SVM pointers. - _pfn_free_func_ must be a valid callback function if any SVM pointer to be - freed is a shared system memory pointer i.e. not allocated using - {clSVMAlloc}. - If _pfn_free_func_ is a valid callback function, the OpenCL implementation - will call _pfn_free_func_ to free all the SVM pointers specified in - _svm_pointers_. - * _user_data_ will be passed as the _user_data_ argument when _pfn_free_func_ - is called. - _user_data_ can be `NULL`. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before {clEnqueueSVMFree} can be executed. - If _event_wait_list_ is `NULL`, then {clEnqueueSVMFree} does not wait on any - event to complete. + * _command_queue_ is a valid command-queue. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from media surfaces. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -// refError +The media surfaces are acquired by the OpenCL context associated with +_command_queue_ and can therefore be used by all command-queues associated +with the OpenCL context. + +OpenCL memory objects created from media surfaces must be acquired before +they can be used by any OpenCL commands queued to a command-queue. +If an OpenCL memory object created from a media surface is used while it is +not currently acquired by OpenCL, the call attempting to use that OpenCL +memory object will return {CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR}. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueAcquireDX9MediaSurfacesKHR} provides the +synchronization guarantee that any media adapter API calls involving the +interop device(s) used in the OpenCL context made before +{clEnqueueAcquireDX9MediaSurfacesKHR} is called will complete executing +before _event_ reports completion and before the execution of any subsequent +OpenCL work issued in _command_queue_ begins. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any media adapter API calls involving the interop +device(s) used in the OpenCL context made before +{clEnqueueAcquireDX9MediaSurfacesKHR} is called have completed before +calling {clEnqueueAcquireDX9MediaSurfacesKHR} *.* -{clEnqueueSVMFree} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +// refError - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueAcquireDX9MediaSurfacesKHR} returns {CL_SUCCESS} if the function +is executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from media surfaces. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_VALUE} if _num_svm_pointers_ is 0 and _svm_pointers_ is - non-`NULL`, _or_ if _svm_pointers_ is `NULL` and _num_svm_pointers_ is - not 0. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from a device that can share the media surface referenced by + _mem_objects_. + * {CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR} if memory objects in + _mem_objects_ have previously been acquired using + {clEnqueueAcquireDX9MediaSurfacesKHR} but have not been released using + {clEnqueueReleaseDX9MediaSurfacesKHR}. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clEnqueueSVMMemcpy',desc='Enqueues a command to do a memcpy operation.',type='protos'] +[open,refpage='clEnqueueReleaseDX9MediaSurfacesKHR',desc='Release OpenCL memory objects created from a media surface',type='protos'] -- -To enqueue a command to do a memcpy operation, call the function +To release OpenCL memory objects that have been created from media surfaces, +call the function -include::{generated}/api/protos/clEnqueueSVMMemcpy.txt[] -include::{generated}/api/version-notes/clEnqueueSVMMemcpy.asciidoc[] +include::{generated}/api/protos/clEnqueueReleaseDX9MediaSurfacesKHR.txt[] +include::{generated}/api/version-notes/clEnqueueReleaseDX9MediaSurfacesKHR.asciidoc[] - * _command_queue_ refers to the host command-queue in which the read / write - command will be queued. - If either _dst_ptr_ or _src_ptr_ is allocated using {clSVMAlloc} then the - OpenCL context allocated against must match that of _command_queue_. - * _blocking_copy_ indicates if the copy operation is _blocking_ or - _non-blocking_. - * If _blocking_copy_ is {CL_TRUE} i.e. the copy command is blocking, - {clEnqueueSVMMemcpy} does not return until the buffer data has been copied - into memory pointed to by _dst_ptr_. - * _size_ is the size in bytes of data being copied. - * _dst_ptr_ is the pointer to a host or SVM memory allocation where data is - copied to. - * _src_ptr_ is the pointer to a host or SVM memory allocation where data is - copied from. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. + * _num_objects_ is the number of memory objects to be released in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from media surfaces. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. - The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this read / write command - and can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -If _blocking_copy_ is {CL_FALSE} i.e. the copy command is non-blocking, -{clEnqueueSVMMemcpy} queues a non-blocking copy command and returns. -The contents of the buffer that _dst_ptr_ points to cannot be used until the -copy command has completed. -The _event_ argument returns an event object which can be used to query the -execution status of the read command. -When the copy command has completed, the contents of the buffer that -_dst_ptr_ points to can be used by the application. - -If the memory allocation(s) containing _dst_ptr_ and/or _src_ptr_ are -allocated using {clSVMAlloc} and either is not allocated from the same -context from which _command_queue_ was created the behavior is undefined. +The media surfaces are released by the OpenCL context associated with +_command_queue_. + +OpenCL memory objects created from media surfaces which have been acquired +by OpenCL must be released by OpenCL before they may be accessed by the +media adapter API. +Accessing a media surface while its corresponding OpenCL memory object is +acquired is in error and will result in undefined behavior, including but +not limited to possible OpenCL errors, data corruption, and program +termination. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueReleaseDX9MediaSurfacesKHR} provides the +synchronization guarantee that any calls to media adapter APIs involving the +interop device(s) used in the OpenCL context made after the call to +{clEnqueueReleaseDX9MediaSurfacesKHR} will not start executing until after +all events in _event_wait_list_ are complete and all work already submitted +to _command_queue_ completes execution. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any media adapter API calls involving the interop +device(s) used in the OpenCL context made after +{clEnqueueReleaseDX9MediaSurfacesKHR} will not start executing until after +event returned by {clEnqueueReleaseDX9MediaSurfacesKHR} reports completion. // refError -{clEnqueueSVMMemcpy} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueReleaseDX9MediaSurfacesKHR} returns {CL_SUCCESS} if the function +is executed successfully. +If _num_objects_ is 0 and <__mem_objects__> is `NULL` the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from valid media surfaces. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and - events in _event_wait_list_ are not the same. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from a media object. + * {CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR} if memory objects in + _mem_objects_ have not previously been acquired using + {clEnqueueAcquireDX9MediaSurfacesKHR}, or have been released using + {clEnqueueReleaseDX9MediaSurfacesKHR} since the last time that they were + acquired. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the copy operation is - blocking and the execution status of any of the events in - _event_wait_list_ is a negative integer value. - * {CL_INVALID_VALUE} if _dst_ptr_ or _src_ptr_ is `NULL`. - * {CL_MEM_COPY_OVERLAP} if the values specified for _dst_ptr_, _src_ptr_ and - _size_ result in an overlapping copy. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + __num_events_in_wait_list__> is 0, or if event objects in + _event_wait_list_ are not valid events. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +endif::cl_khr_dx9_media_sharing[] -[open,refpage='clEnqueueSVMMemFill',desc='Enqueues a command to fill a region in memory with a pattern of a given pattern size.',type='protos'] + +ifdef::cl_khr_d3d10_sharing[] +=== Sharing Memory Objects Created From Direct3D 10 Resources Between Direct3D 10 and OpenCL Contexts + +[open,refpage='clEnqueueAcquireD3D10ObjectsKHR',desc='Acquire OpenCL memory objects created from Direct3D 10 resources',type='protos'] -- -To enqueue a command to fill a region in memory with a pattern of a given -pattern size, call the function +To acquire OpenCL memory objects that have been created from Direct3D 10 +resources, call the function -include::{generated}/api/protos/clEnqueueSVMMemFill.txt[] -include::{generated}/api/version-notes/clEnqueueSVMMemFill.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireD3D10ObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireD3D10ObjectsKHR.asciidoc[] - * _command_queue_ refers to the host command-queue in which the fill command - will be queued. - The OpenCL context associated with _command_queue_ and SVM pointer referred - to by _svm_ptr_ must be the same. - * _svm_ptr_ is a pointer to a memory region that will be filled with - _pattern_. - It must be aligned to _pattern_size_ bytes. - If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from - the same context from which _command_queue_ was created. - Otherwise the behavior is undefined. - * _pattern_ is a pointer to the data pattern of size _pattern_size_ in bytes. - _pattern_ will be used to fill a region in _buffer_ starting at _svm_ptr_ - and is _size_ bytes in size. - The data pattern must be a scalar or vector integer or floating-point data - type supported by OpenCL as described in <> and <>. - For example, if region pointed to by _svm_ptr_ is to be filled with a - pattern of float4 values, then _pattern_ will be a pointer to a cl_float4 - value and _pattern_size_ will be `sizeof(cl_float4)`. - The maximum value of _pattern_size_ is the size of the largest integer or - floating-point vector data type supported by the OpenCL device. - The memory associated with _pattern_ can be reused or freed after the - function returns. - * _size_ is the size in bytes of region being filled starting with _svm_ptr_ - and must be a multiple of _pattern_size_. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. + * _command_queue_ is a valid command-queue. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from Direct3D 10 resources. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -// refError +The Direct3D 10 objects are acquired by the OpenCL context associated with +_command_queue_ and can therefore be used by all command-queues associated +with the OpenCL context. + +OpenCL memory objects created from Direct3D 10 resources must be acquired +before they can be used by any OpenCL commands queued to a command-queue. +If an OpenCL memory object created from a Direct3D 10 resource is used while +it is not currently acquired by OpenCL, the behavior is undefined. +Implementations may fail the execution of commands attempting to use that +OpenCL memory object and set their associated event's execution status to +{CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR}. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueAcquireD3D10ObjectsKHR} provides the +synchronization guarantee that any Direct3D 10 calls involving the interop +device(s) used in the OpenCL context made before +{clEnqueueAcquireD3D10ObjectsKHR} is called will complete executing before +_event_ reports completion and before the execution of any subsequent OpenCL +work issued in _command_queue_ begins. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any Direct3D 10 calls involving the interop device(s) used +in the OpenCL context made before {clEnqueueAcquireD3D10ObjectsKHR} is +called have completed before calling {clEnqueueAcquireD3D10ObjectsKHR}. -{clEnqueueSVMMemFill} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +// refError - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueAcquireD3D10ObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from Direct3D 10 resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and - events in _event_wait_list_ are not the same. - * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. - * {CL_INVALID_VALUE} if _svm_ptr_ is not aligned to _pattern_size_ bytes. - * {CL_INVALID_VALUE} if _pattern_ is `NULL` or if _pattern_size_ is 0 or if - _pattern_size_ is not one of {1, 2, 4, 8, 16, 32, 64, 128}. - * {CL_INVALID_VALUE} if _size_ is not a multiple of _pattern_size_. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from an Direct3D 10 context. + * {CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR} if memory objects in + _mem_objects_ have previously been acquired using + {clEnqueueAcquireD3D10ObjectsKHR} but have not been released using + {clEnqueueReleaseD3D10ObjectsKHR}. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clEnqueueSVMMap',desc='Enqueues a command that will allow the host to update a region of a SVM buffer',type='protos'] +[open,refpage='clEnqueueReleaseD3D10ObjectsKHR',desc='Release OpenCL memory objects created from Direct3D 10 resources',type='protos'] -- -To enqueue a command that will allow the host to update a region of a SVM -buffer, call the function +To release OpenCL memory objects that have been created from Direct3D 10 +resources, call the function -include::{generated}/api/protos/clEnqueueSVMMap.txt[] -include::{generated}/api/version-notes/clEnqueueSVMMap.asciidoc[] +include::{generated}/api/protos/clEnqueueReleaseD3D10ObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueReleaseD3D10ObjectsKHR.asciidoc[] - * _command_queue_ must be a valid host command-queue. - * _blocking_map_ indicates if the map operation is _blocking_ or - _non-blocking_. - * _map_flags_ is a bit-field and is described in the - <> table. - * _svm_ptr_ and _size_ are a pointer to a memory region and size in bytes that - will be updated by the host. - If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from - the same context from which _command_queue_ was created. - Otherwise the behavior is undefined. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. + * _num_objects_ is the number of memory objects to be released in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from Direct3D 10 resources. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. - The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -If _blocking_map_ is {CL_TRUE}, {clEnqueueSVMMap} does not return until the -application can access the contents of the SVM region specified by _svm_ptr_ -and _size_ on the host. - -If _blocking_map_ is {CL_FALSE} i.e. map operation is non-blocking, the region -specified by _svm_ptr_ and _size_ cannot be used until the map command has -completed. -The _event_ argument returns an event object which can be used to query the -execution status of the map command. -When the map command is completed, the application can access the contents -of the region specified by _svm_ptr_ and _size_. - -Note that since we are enqueuing a command with a SVM buffer, the region is -already mapped in the host address space. +The Direct3D 10 objects are released by the OpenCL context associated with +_command_queue_. + +OpenCL memory objects created from Direct3D 10 resources which have been +acquired by OpenCL must be released by OpenCL before they may be accessed by +Direct3D 10. +Accessing a Direct3D 10 resource while its corresponding OpenCL memory +object is acquired is in error and will result in undefined behavior, +including but not limited to possible OpenCL errors, data corruption, and +program termination. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueReleaseD3D10ObjectsKHR} provides the +synchronization guarantee that any calls to Direct3D 10 calls involving the +interop device(s) used in the OpenCL context made after the call to +{clEnqueueReleaseD3D10ObjectsKHR} will not start executing until after all +events in _event_wait_list_ are complete and all work already submitted to +_command_queue_ completes execution. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any Direct3D 10 calls involving the interop device(s) used +in the OpenCL context made after {clEnqueueReleaseD3D10ObjectsKHR} will not +start executing until after event returned by +{clEnqueueReleaseD3D10ObjectsKHR} reports completion. // refError -{clEnqueueSVMMap} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueReleaseD3D10ObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing +and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from Direct3D 10 resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. - * {CL_INVALID_VALUE} if _size_ is 0 or if values specified in _map_flags_ - are not valid. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from a Direct3D 10 device. + * {CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR} if memory objects in _mem_objects_ + have not previously been acquired using + {clEnqueueAcquireD3D10ObjectsKHR}, or have been released using + {clEnqueueReleaseD3D10ObjectsKHR} since the last time that they were + acquired. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the map operation is - blocking and the execution status of any of the events in - _event_wait_list_ is a negative integer value. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + __num_events_in_wait_list__> is 0, or if event objects in + _event_wait_list_ are not valid events. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +endif::cl_khr_d3d10_sharing[] -[open,refpage='clEnqueueSVMUnmap',desc='Enqueues a command to indicate that the host has completed updating the region given by an SVM pointer and which was specified in a previous call to clEnqueueSVMMap.',type='protos'] + +ifdef::cl_khr_d3d11_sharing[] +=== Sharing Memory Objects Created From Direct3D 11 Resources Between Direct3D 11 and OpenCL Contexts + +[open,refpage='clEnqueueAcquireD3D11ObjectsKHR',desc='Acquire OpenCL memory objects created from Direct3D 11 resources',type='protos'] -- -To enqueue a command to indicate that the host has completed updating the -region given by _svm_ptr_ and which was specified in a previous call to -{clEnqueueSVMMap}, call the function +To acquire OpenCL memory objects that have been created from Direct3D 11 +resources, call the function -include::{generated}/api/protos/clEnqueueSVMUnmap.txt[] -include::{generated}/api/version-notes/clEnqueueSVMUnmap.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireD3D11ObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireD3D11ObjectsKHR.asciidoc[] - * _command_queue_ must be a valid host command-queue. - * _svm_ptr_ is a pointer that was specified in a previous call to - {clEnqueueSVMMap}. - If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from - the same context from which _command_queue_ was created. - Otherwise the behavior is undefined. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before {clEnqueueSVMUnmap} can be executed. - If _event_wait_list_ is `NULL`, then {clEnqueueSVMUnmap} does not wait on any - event to complete. + * _command_queue_ is a valid command-queue. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from Direct3D 11 resources. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -{clEnqueueSVMMap} and {clEnqueueSVMUnmap} act as synchronization points for -the region of the SVM buffer specified in these calls. +The Direct3D 11 objects are acquired by the OpenCL context associated with +_command_queue_ and can therefore be used by all command-queues associated +with the OpenCL context. + +OpenCL memory objects created from Direct3D 11 resources must be acquired +before they can be used by any OpenCL commands queued to a command-queue. +If an OpenCL memory object created from a Direct3D 11 resource is used while +it is not currently acquired by OpenCL, the behavior is undefined. +Implementations may fail the execution of commands attempting to use that +OpenCL memory object and set their associated event's execution status to +{CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR}. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueAcquireD3D11ObjectsKHR} provides the +synchronization guarantee that any Direct3D 11 calls involving the interop +device(s) used in the OpenCL context made before +{clEnqueueAcquireD3D11ObjectsKHR} is called will complete executing before +_event_ reports completion and before the execution of any subsequent OpenCL +work issued in _command_queue_ begins. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any Direct3D 11 calls involving the interop device(s) used +in the OpenCL context made before {clEnqueueAcquireD3D11ObjectsKHR} is +called have completed before calling {clEnqueueAcquireD3D11ObjectsKHR}. // refError -{clEnqueueSVMUnmap} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueAcquireD3D11ObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from Direct3D 11 resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from an Direct3D 11 context. + * {CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR} if memory objects in + _mem_objects_ have previously been acquired using + {clEnqueueAcquireD3D11ObjectsKHR} but have not been released using + {clEnqueueReleaseD3D11ObjectsKHR}. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or if _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +-- -[NOTE] -==== -If a coarse-grained SVM buffer is currently mapped for writing, the -application must ensure that the SVM buffer is unmapped before any enqueued -kernels or commands that read from or write to this SVM buffer or any of its -associated {cl_mem_TYPE} buffer objects begin execution; otherwise the behavior is -undefined. +[open,refpage='clEnqueueReleaseD3D11ObjectsKHR',desc='Release OpenCL memory objects created from Direct3D 11 resources',type='protos'] +-- +To release OpenCL memory objects that have been created from Direct3D 11 +resources, call the function -If a coarse-grained SVM buffer is currently mapped for reading, the -application must ensure that the SVM buffer is unmapped before any enqueued -kernels or commands that write to this memory object or any of its -associated {cl_mem_TYPE} buffer objects begin execution; otherwise the behavior is -undefined. +include::{generated}/api/protos/clEnqueueReleaseD3D11ObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueReleaseD3D11ObjectsKHR.asciidoc[] -A SVM buffer is considered as mapped if there are one or more active -mappings for the SVM buffer irrespective of whether the mapped regions span -the entire SVM buffer. + * _num_objects_ is the number of memory objects to be released in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from Direct3D 11 resources. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -The above note does not apply to fine-grained SVM buffers (fine-grained -buffers allocated using {clSVMAlloc} or fine-grained system allocations). -==== +The Direct3D 11 objects are released by the OpenCL context associated with +_command_queue_. + +OpenCL memory objects created from Direct3D 11 resources which have been +acquired by OpenCL must be released by OpenCL before they may be accessed by +Direct3D 11. +Accessing a Direct3D 11 resource while its corresponding OpenCL memory +object is acquired is in error and will result in undefined behavior, +including but not limited to possible OpenCL errors, data corruption, and +program termination. + +If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during +context creation, {clEnqueueReleaseD3D11ObjectsKHR} provides the +synchronization guarantee that any calls to Direct3D 11 calls involving the +interop device(s) used in the OpenCL context made after the call to +{clEnqueueReleaseD3D11ObjectsKHR} will not start executing until after all +events in _event_wait_list_ are complete and all work already submitted to +_command_queue_ completes execution. +If the context was created with properties specifying +{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for +guaranteeing that any Direct3D 11 calls involving the interop device(s) used +in the OpenCL context made after {clEnqueueReleaseD3D11ObjectsKHR} will not +start executing until after event returned by +{clEnqueueReleaseD3D11ObjectsKHR} reports completion. + +// refError + +{clEnqueueReleaseD3D11ObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing +and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects or if memory objects in _mem_objects_ have not + been created from Direct3D 11 resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from a Direct3D 11 device. + * {CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR} if memory objects in _mem_objects_ + have not previously been acquired using + {clEnqueueAcquireD3D11ObjectsKHR}, or have been released using + {clEnqueueReleaseD3D11ObjectsKHR} since the last time that they were + acquired. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + __num_events_in_wait_list__> is 0, or if event objects in + _event_wait_list_ are not valid events. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- +endif::cl_khr_d3d11_sharing[] -[open,refpage='clEnqueueSVMMigrateMem',desc='Enqueues a command to indicate which device a set of ranges of SVM allocations should be associated with.',type='protos'] + +ifdef::cl_khr_egl_image[] +=== Sharing Memory Objects Created From EGL Resources Between EGL and OpenCL Contexts + +[open,refpage='clEnqueueAcquireEGLObjectsKHR',desc='Acquire OpenCL memory objects created from EGL resources',type='protos'] -- -To enqueue a command to indicate which device a set of ranges of SVM -allocations should be associated with, call the function +To acquire OpenCL memory objects that have been created from EGL resources, +call the function -include::{generated}/api/protos/clEnqueueSVMMigrateMem.txt[] -include::{generated}/api/version-notes/clEnqueueSVMMigrateMem.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireEGLObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireEGLObjectsKHR.asciidoc[] - * _command_queue_ is a valid host command-queue. - The specified set of allocation ranges will be migrated to the OpenCL device - associated with _command_queue_. - * _num_svm_pointers_ is the number of pointers in the specified _svm_pointers_ - array, and the number of sizes in the _sizes_ array, if _sizes_ is not - `NULL`. - * _svm_pointers_ is a pointer to an array of pointers. - Each pointer in this array must be within an allocation produced by a call - to {clSVMAlloc}. - * _sizes_ is an array of sizes. - The pair _svm_pointers_[i] and _sizes_[i] together define the starting - address and number of bytes in a range to be migrated. - _sizes_ may be `NULL` indicating that every allocation containing any - _svm_pointer_[i] is to be migrated. - Also, if _sizes_[i] is zero, then the entire allocation containing - _svm_pointer_[i] is migrated. - * _flags_ is a bit-field that is used to specify migration options. - The <> describes the possible - values for _flags_. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. + * _command_queue_ is a valid command-queue. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from EGL resources, within the context associate with + command_queue. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or queue a wait for this command to complete. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer to an element of the _event_wait_list_ array. -Once the event returned by {clEnqueueSVMMigrateMem} has become {CL_COMPLETE}, -the ranges specified by svm pointers and sizes have been successfully -migrated to the device associated with command-queue. +The EGL objects are acquired by the OpenCL context associated with +_command_queue_ and can therefore be used by all command-queues associated +with the OpenCL context. -The user is responsible for managing the event dependencies associated with -this command in order to avoid overlapping access to SVM allocations. -Improperly specified event dependencies passed to {clEnqueueSVMMigrateMem} -could result in undefined results. +OpenCL memory objects created from EGL resources must be acquired before +they can be used by any OpenCL commands queued to a command-queue. +If an OpenCL memory object created from a EGL resource is used while it is +not currently acquired by OpenCL, the behavior is undefined. +Implementations may fail the execution of commands attempting to use that +OpenCL memory object and set their associated event's execution status to +{CL_EGL_RESOURCE_NOT_ACQUIRED_KHR}. // refError -{clEnqueueSVMMigrateMem} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host +{clEnqueueAcquireEGLObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if num_objects > 0 and mem_objects is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects in the context associated with _command_queue_. + * {CL_INVALID_EGL_OBJECT_KHR} if memory objects in _mem_objects_ have not + been created from EGL resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid command-queue. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_VALUE} if _num_svm_pointers_ is zero or _svm_pointers_ is - `NULL`. - * {CL_INVALID_VALUE} if _sizes_[i] is non-zero range [_svm_pointers_[i], - _svm_pointers_[i]+_sizes_[i]) is not contained within an existing - {clSVMAlloc} allocation. * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or if _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +[open,refpage='clEnqueueReleaseEGLObjectsKHR',desc='Release OpenCL memory objects created from EGL resources',type='protos'] +-- +To release OpenCL memory objects that have been created from EGL resources, +call the function -=== Memory consistency for SVM allocations +include::{generated}/api/protos/clEnqueueReleaseEGLObjectsKHR.txt[] +include::{generated}/api/version-notes/clEnqueueReleaseEGLObjectsKHR.asciidoc[] -To ensure memory consistency in SVM allocations, the program can rely on the -guaranteed memory consistency at synchronization points. -This consistency support already exists in OpenCL 1.x and can be used for -coarse-grained SVM allocations or for fine-grained buffer SVM allocations; -what SVM adds is the ability to share pointers between the host and all SVM -devices. + * _command_queue_ is a valid command-queue. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that were + created from EGL resources, within the context associate with + command_queue. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -In addition, sub-buffers can also be used to ensure that each device gets a -consistent view of a SVM buffers memory when it is shared by multiple -devices. -For example, assume that two devices share a SVM pointer. -The host can create a {cl_mem_TYPE} buffer object using {clCreateBuffer} or -{clCreateBufferWithProperties} with {CL_MEM_USE_HOST_PTR} and _host_ptr_ set -to the SVM pointer and then create two disjoint sub-buffers with starting -virtual addresses _sb1_ptr_ and _sb2_ptr_. -These pointers (_sb1_ptr_ and _sb2_ptr_) can be passed to kernels executing -on the two devices. -{clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} and the existing -<> ensure -consistency for buffer regions (_sb1_ptr_ and _sb2_ptr_) read and written by -these kernels. +The EGL objects are released by the OpenCL context associated with +. -When the host and devices are able to use SVM atomic operations (i.e. -{CL_DEVICE_SVM_ATOMICS} is set in {CL_DEVICE_SVM_CAPABILITIES}), these atomic -operations can be used to provide memory consistency at a fine grain in a -shared memory region. -The effect of these operations is visible to the host and all devices with -which that memory is shared. +OpenCL memory objects created from EGL resources which have been acquired by +OpenCL must be released by OpenCL before they may be accessed by EGL or by +EGL client APIs. +Accessing a EGL resource while its corresponding OpenCL memory object is +acquired is in error and will result in undefined behavior, including but +not limited to possible OpenCL errors, data corruption, and program +termination. +// refError -== Sampler Objects - -A sampler object describes how to sample an image when the image is read in -the kernel. -The built-in functions to read from an image in a kernel take a sampler as -an argument. -The sampler arguments to the image read function can be sampler objects -created using OpenCL functions and passed as argument values to the kernel -or can be samplers declared inside a kernel. -In this section we discuss how sampler objects are created using OpenCL -functions. +{clEnqueueReleaseEGLObjectsKHR} returns {CL_SUCCESS} if the function is +executed successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does +nothing and returns {CL_SUCCESS}. +Otherwise it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if num_objects > 0 and mem_objects is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects in the context associated with _command_queue_. + * {CL_INVALID_EGL_OBJECT_KHR} if memory objects in _mem_objects_ have not + been created from EGL resources. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid + command-queue. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +endif::cl_khr_egl_image[] -=== Creating Sampler Objects +ifdef::cl_khr_gl_sharing[] +[[acquiring-shared-opencl-opengl-memory-objects]] +=== Acquiring, Releasing, and Synchronizing Access to Shared OpenCL/OpenGL Memory Objects -[open,refpage='clCreateSamplerWithProperties',desc='Creates a sampler object.',type='protos'] +[open,refpage='clEnqueueAcquireGLObjects',desc='Acquire OpenCL memory objects created from OpenGL objects',type='protos'] -- -To create a sampler object, call the function +To acquire OpenCL memory objects that have been created from OpenGL objects, +call the function -include::{generated}/api/protos/clCreateSamplerWithProperties.txt[] -include::{generated}/api/version-notes/clCreateSamplerWithProperties.asciidoc[] +include::{generated}/api/protos/clEnqueueAcquireGLObjects.txt[] +include::{generated}/api/version-notes/clEnqueueAcquireGLObjects.asciidoc[] - * _context_ must be a valid OpenCL context. - * _sampler_properties_ specifies a list of sampler property names and their - corresponding values. - Each sampler property name is immediately followed by the corresponding - desired value. - The list is terminated with 0. - The list of supported properties is described in the - <> table. - If a supported property and its value is not specified in - _sampler_properties_, its default value will be used. - _sampler_properties_ can be `NULL` in which case the default values for - supported sampler properties will be used. + * _command_queue_ is a valid command-queue. + All devices used to create the OpenCL context associated with + _command_queue_ must support acquiring shared OpenCL/OpenGL objects. + This constraint is enforced at context creation time. + * _num_objects_ is the number of memory objects to be acquired in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that + correspond to OpenGL objects. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + * _event_ returns an event object that identifies this command and can be + used to query wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -[[sampler-properties-table]] -.List of supported sampler creation properties by {clCreateSamplerWithProperties} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Sampler Property | Property Value | Description -| {CL_SAMPLER_NORMALIZED_COORDS_anchor} +ifdef::cl_khr_gl_event[] +If an OpenGL context is bound to the current thread, then any OpenGL +commands which -include::{generated}/api/version-notes/CL_SAMPLER_NORMALIZED_COORDS.asciidoc[] - | {cl_bool_TYPE} - | A boolean value that specifies whether the image coordinates - specified are normalized or not. + . affect or access the contents of a memory object listed in the + _mem_objects_ list, and + . were issued on that OpenGL context prior to the call to + {clEnqueueAcquireGLObjects} - The default value (i.e. the value used if this property is not - specified in sampler_properties) is {CL_TRUE}. -| {CL_SAMPLER_ADDRESSING_MODE_anchor} +will complete before execution of any OpenCL commands following the +{clEnqueueAcquireGLObjects} which affect or access any of those memory +objects. +If a non-`NULL` _event_ object is returned, it will report completion only +after completion of such OpenGL commands. +endif::cl_khr_gl_event[] -include::{generated}/api/version-notes/CL_SAMPLER_ADDRESSING_MODE.asciidoc[] - | {cl_addressing_mode_TYPE} - | Specifies how out-of-range image coordinates are handled when - reading from an image. - Valid values are: +These objects need to be acquired before they can be used by any OpenCL +commands queued to a command-queue or the behaviour is undefined. +The OpenGL objects are acquired by the OpenCL context associated with +_command_queue_ and can therefore be used by all command-queues associated +with the OpenCL context. - {CL_ADDRESS_NONE_anchor} - Behavior is undefined for out-of-range - image coordinates. +// refError - {CL_ADDRESS_CLAMP_TO_EDGE_anchor} - Out-of-range image coordinates - are clamped to the edge of the image. +{clEnqueueAcquireGLObjects} returns {CL_SUCCESS} if the function is executed +successfully. +If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing +and returns {CL_SUCCESS}. +Otherwise, it returns one of the following errors: - {CL_ADDRESS_CLAMP_anchor} - Out-of-range image coordinates are - assigned a border color value. + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from an OpenGL context + * {CL_INVALID_GL_OBJECT} if memory objects in _mem_objects_ have not been + created from an OpenGL object(s). + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- - {CL_ADDRESS_REPEAT_anchor} - Out-of-range image coordinates read - from the image as if the image data were replicated in all dimensions. +[open,refpage='clEnqueueReleaseGLObjects',desc='Release OpenCL memory objects created from OpenGL objects',type='protos'] +-- +To release OpenCL memory objects that have been created from OpenGL objects, +call the function - {CL_ADDRESS_MIRRORED_REPEAT_anchor} - Out-of-range image coordinates - read from the image as if the image data were replicated in all - dimensions, mirroring the image contents at the edge of each - replication. +include::{generated}/api/protos/clEnqueueReleaseGLObjects.txt[] +include::{generated}/api/version-notes/clEnqueueReleaseGLObjects.asciidoc[] - The default is {CL_ADDRESS_CLAMP}. -| {CL_SAMPLER_FILTER_MODE_anchor} + * _num_objects_ is the number of memory objects to be released in + _mem_objects_. + * _mem_objects_ is a pointer to a list of OpenCL memory objects that + correspond to OpenGL objects. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need + to complete before this command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + * _event_ returns an event object that identifies this command and can be + used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of + this command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -include::{generated}/api/version-notes/CL_SAMPLER_FILTER_MODE.asciidoc[] - | {cl_filter_mode_TYPE} - | Specifies the type of filter that is applied when reading an - image. - Valid values are: +ifdef::cl_khr_gl_event[] +If an OpenGL context is bound to the current thread, then then any OpenGL +commands which - {CL_FILTER_NEAREST_anchor} - Returns the image element nearest - to the image coordinate. + . affect or access the contents of the memory objects listed in the + _mem_objects_ list, and + . are issued on that context after the call to {clEnqueueReleaseGLObjects} - {CL_FILTER_LINEAR_anchor} - Returns a weighted average of the - four image elements nearest to the image coordinate. +will not execute until after execution of any OpenCL commands preceding the - The default value is {CL_FILTER_NEAREST}. -|==== - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +{clEnqueueReleaseGLObjects} which affect or access any of those memory +objects. +If a non-`NULL` _event_ object is returned, it will report completion before +execution of such OpenGL commands. +endif::cl_khr_gl_event[] + +These objects need to be released before they can be used by OpenGL. +The OpenGL objects are released by the OpenCL context associated with +_command_queue_. // refError -{clCreateSamplerWithProperties} returns a valid non-zero sampler object and -_errcode_ret_ is set to {CL_SUCCESS} if the sampler object is created +{clEnqueueReleaseGLObjects} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing +and returns {CL_SUCCESS}. +Otherwise, it returns one of the following errors: - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if the property name in _sampler_properties_ is not a - supported property name, if the value specified for a supported property - name is not valid, or if the same property name is specified more than - once. - * {CL_INVALID_OPERATION} if images are not supported by any device - associated with _context_ (i.e. {CL_DEVICE_IMAGE_SUPPORT} specified in the - <> table is {CL_FALSE}). - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a + `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. + * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid + OpenCL memory objects. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not + created from an OpenGL context + * {CL_INVALID_GL_OBJECT} if memory objects in _mem_objects_ have not been + created from an OpenGL object(s). + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clCreateSampler',desc='Creates a sampler object.',type='protos'] --- -To create a sampler object, call the function -include::{generated}/api/protos/clCreateSampler.txt[] -include::{generated}/api/version-notes/clCreateSampler.asciidoc[] +// The following section is quite ugly and duplicative, and potentially +// could be simplified. +// The problem is that there are a large number of scenarios being +// described: +// * Using either EGL or another OpenGL binding API via the egl_image or +// gl_sharing extensions +// * Using either OpenGL or OpenGL ES, or potentially another EGL client API +// * Attempting to synchronize via either EGL or OpenGL/OpenGL ES fence +// sync objects, via the egl_event or gl_event extensions - * _context_ must be a valid OpenCL context. - * _normalized_coords_ has the same interpretation as - {CL_SAMPLER_NORMALIZED_COORDS} in the <>. - * _addressing_mode_ has the same interpretation as - {CL_SAMPLER_ADDRESSING_MODE} in the <>. - * _filter_mode_ has the same interpretation as - {CL_SAMPLER_FILTER_MODE} in the <>. - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +ifdef::cl_khr_egl_image,cl_khr_gl_sharing[] +==== Synchronizing Access to Memory Objects Shared With EGL or OpenGL + +When sharing objects such as EGL images (if the `<>` +extension is supported) or OpenGL buffers, textures, and renderbuffers (if +the `<>` extension is supported), in order to ensure data +integrity, the application is responsible for synchronizing access to shared +memory objects through the other API with which such objects are shared. -// refError +Failure to provide such synchronization may result in race conditions and +other undefined behavior including non-portability between implementations. -{clCreateSampler} returns a valid non-zero sampler object and _errcode_ret_ is -set to {CL_SUCCESS} if the sampler object is created successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +Prior to acquiring objects shared with the other API via an appropriate +{clEnqueueAcquire}*** call, the application must ensure that any pending +operations in that API which accesses the objects specified in _mem_objects_ +have completed. - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if _addressing_mode_, _filter_mode_, _normalized_coords_ - or a combination of these arguements are not valid. - * {CL_INVALID_OPERATION} if images are not supported by any device - associated with _context_ (i.e. {CL_DEVICE_IMAGE_SUPPORT} specified in the - <> table is {CL_FALSE}). - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +Depending on the application and the implementation, there are two +extensions which may be used to synchronize with other APIs: -[open,refpage='clRetainSampler',desc='Increments the sampler reference count.',type='protos'] --- -To retain a sampler object, call the function -include::{generated}/api/protos/clRetainSampler.txt[] -include::{generated}/api/version-notes/clRetainSampler.asciidoc[] +ifdef::cl_khr_egl_image[] +===== Synchronization With EGL and EGL Client APIs - * _sampler_ specifies the sampler to be released. +When sharing with an EGL context via the `<>` extension, +if the `<>` extension is supported, and the EGL context in +question supports fence sync objects, _explicit synchronization_ with EGL or +EGL client APIs can be achieved as described in the +<> section. -The _sampler_ reference count is incremented. -{clCreateSamplerWithProperties} and {clCreateSampler} perform an implicit -retain. +If the `<>` extension is not supported, completion of EGL +client API commands may be determined by issuing and waiting for completion +of commands such as `glFinish` or `vgFinish` on all client API contexts with +pending references to these objects. +endif::cl_khr_egl_image[] -// refError -{clRetainSampler} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +ifdef::cl_khr_gl_sharing[] +===== Synchronization With OpenGL - * {CL_INVALID_SAMPLER} if _sampler_ is not a valid sampler object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +When sharing with an OpenGL context via the `<>` +extension, the OpenCL implementation will ensure that any such pending +OpenGL operations are complete for an OpenGL context bound to the same +thread as the OpenCL context. This is referred to as _implicit +synchronization_. -[open,refpage='clReleaseSampler',desc='Decrements the sampler reference count.',type='protos'] --- -To release a sampler object, call the function +If the `<>` extension is supported, and the OpenGL context +in question supports fence sync objects, _explicit synchronization_ with +OpenGL can be achieved as described in the <> section. -include::{generated}/api/protos/clReleaseSampler.txt[] -include::{generated}/api/version-notes/clReleaseSampler.asciidoc[] +If the `<>` extension is not supported, completion of +OpenGL commands may be determined by issuing and waiting for completion of a +`glFinish` command on all OpenGL contexts with pending references to these +objects. +endif::cl_khr_gl_sharing[] - * _sampler_ specifies the sampler to be released. -The _sampler_ reference count is decremented. -The sampler object is deleted after the reference count becomes zero and -commands queued for execution on a command-queue(s) that use _sampler_ have -finished. +===== General Considerations for Synchronization With Other APIs -// refError +Some implementations may offer other efficient synchronization methods. If +such methods exist they will be described in platform-specific +documentation. -{clReleaseSampler} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +Note that no synchronization method other than `glFinish` is portable +between all OpenGL implementations and all OpenCL implementations. +While this is the only way to ensure completion that is portable to all +platforms, `glFinish` is an expensive operation and its use should be +avoided if the `<>` or `<>` extensions +are supported on a platform. - * {CL_INVALID_SAMPLER} if _sampler_ is not a valid sampler object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. -Using this function to release a reference that was not obtained by creating -the object or by calling {clRetainSampler} causes undefined behavior. --- +===== Synchronizing OpenCL Operations With Other APIs +After releasing a shared memory object via an appropriate +{clEnqueueRelease}*** call, the application is responsible for ensuring that +any pending OpenCL operations which access the objects specified in +_mem_objects_ have completed prior to executing subsequent commands in the +other API which reference these objects. -=== Sampler Object Queries +This may be accomplished portably by calling {clWaitForEvents} with the +event object returned by *clEnqueueReleaseGLObjects,* or by calling +{clFinish}. +As above, some implementations may offer more efficient methods. -[open,refpage='clGetSamplerInfo',desc='Returns information about the sampler object.',type='protos'] --- -To return information about a sampler object, call the function +The application is responsible for maintaining the proper order of +operations if the OpenCL context and the other API context are in separate +threads. + +If an OpenGL context is bound to a thread other than the one in which +{clEnqueueReleaseGLObjects} is called, changes to any of the objects in +_mem_objects_ may not be visible to that context without additional steps +being taken by the application. +For an OpenGL 3.1 (or later) context, the requirements are described in +Appendix D ("`Shared Objects and Multiple Contexts`") of the OpenGL 3.1 +Specification. +For prior versions of OpenGL, the requirements are implementation-dependent. -include::{generated}/api/protos/clGetSamplerInfo.txt[] -include::{generated}/api/version-notes/clGetSamplerInfo.asciidoc[] +Attempting to access the data store of an OpenGL object after it has been +acquired by OpenCL and before it has been released will result in undefined +behavior. +Similarly, attempting to access a shared OpenCL/OpenGL object from OpenCL +before it has been acquired by the OpenCL command-queue, or after it has +been released, will result in undefined behavior. - * _sampler_ specifies the sampler being queried. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetSamplerInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. +endif::cl_khr_egl_image,cl_khr_gl_sharing[] +endif::cl_khr_gl_sharing[] -[[sampler-info-table]] -.List of supported param_names by {clGetSamplerInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Sampler Info | Return Type | Description -| {CL_SAMPLER_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] -include::{generated}/api/version-notes/CL_SAMPLER_REFERENCE_COUNT.asciidoc[] - | {cl_uint_TYPE} - | Return the _sampler_ reference count. -| {CL_SAMPLER_CONTEXT_anchor} +== Shared Virtual Memory -include::{generated}/api/version-notes/CL_SAMPLER_CONTEXT.asciidoc[] - | {cl_context_TYPE} - | Return the context specified when the sampler is created. -// Note: This enum is used for two purposes: as a property and for a query. -// We use the property as the anchor. -| {CL_SAMPLER_NORMALIZED_COORDS} +NOTE: Shared virtual memory is <> version 2.0. -include::{generated}/api/version-notes/CL_SAMPLER_NORMALIZED_COORDS.asciidoc[] - | {cl_bool_TYPE} - | Return the normalized coords value associated with _sampler_. -// Note: This enum is used for two purposes: as a property and for a query. -// We use the property as the anchor. -| {CL_SAMPLER_ADDRESSING_MODE} - -include::{generated}/api/version-notes/CL_SAMPLER_ADDRESSING_MODE.asciidoc[] - | {cl_addressing_mode_TYPE} - | Return the addressing mode value associated with _sampler_. -// Note: This enum is used for two purposes: as a property and for a query. -// We use the property as the anchor. -| {CL_SAMPLER_FILTER_MODE} - -include::{generated}/api/version-notes/CL_SAMPLER_FILTER_MODE.asciidoc[] - | {cl_filter_mode_TYPE} - | Return the filter mode value associated with _sampler_. +Shared virtual memory (a.k.a. SVM) allows the host and kernels executing on +devices to directly share complex, pointer-containing data structures such as +trees and linked lists. +It also eliminates the need to marshal data between the host and devices. +As a result, SVM substantially simplifies OpenCL programming and may improve +performance. -| {CL_SAMPLER_PROPERTIES_anchor} -include::{generated}/api/version-notes/CL_SAMPLER_PROPERTIES.asciidoc[] - | {cl_sampler_properties_TYPE}[] - | Return the properties argument specified in - {clCreateSamplerWithProperties}. +=== SVM Sharing Granularity: Coarse- and Fine- Grained Sharing - If the _properties_ argument specified in {clCreateSamplerWithProperties} - used to create _sampler_ was not `NULL`, the implementation must return - the values specified in the properties argument in the same order and - without including additional properties. +OpenCL maintains memory consistency in a coarse-grained fashion in regions +of buffers. +We call this coarse-grained sharing. +Many platforms such as those with integrated CPU-GPU processors and ones +using the SVM-related PCI-SIG IOMMU services can do better, and can support +sharing at a granularity smaller than a buffer. +We call this fine-grained sharing. - If _sampler_ was created using {clCreateSampler}, or if the _properties_ - argument specified in {clCreateSamplerWithProperties} was `NULL`, the - implementation must return _param_value_size_ret_ equal to 0, - indicating that there are no properties to be returned. -|==== + * Coarse-grained sharing: Coarse-grain sharing may be used for memory and + virtual pointer sharing between multiple devices as well as between the + host and one or more devices. + The shared memory region is a memory buffer allocated using + {clSVMAlloc}. + Memory consistency is guaranteed at synchronization points and the host + can use calls to {clEnqueueSVMMap} and {clEnqueueSVMUnmap} or create a + {cl_mem_TYPE} buffer object using the SVM pointer and use OpenCL's existing host + API functions {clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} to + update regions of the buffer. + What coarse-grain buffer SVM adds to OpenCL's earlier buffer support are + the ability to share virtual memory pointers and a guarantee that + concurrent access to the same memory allocation from multiple kernels on + a single device is valid. + The coarse-grain buffer SVM provides a memory consistency model similar + to the global memory consistency model described in _sections 3.3.1_ and + _3.4.3_ of the OpenCL 1.2 specification. + This memory consistency applies to the regions of buffers being shared + in a coarse-grained fashion. + It is enforced at the synchronization points between commands enqueued + to command-queues in a single context with the additional consideration + that multiple kernels concurrently running on the same device may safely + share the data. + * Fine-grained sharing: Shared virtual memory where memory consistency is + maintained at a granularity smaller than a buffer. + How fine-grained SVM is used depends on whether the device supports SVM + atomic operations. + ** If SVM atomic operations are supported, they provide memory consistency + for loads and stores by the host and kernels executing on devices + supporting SVM. + This means that the host and devices can concurrently read and update + the same memory. + The consistency provided by SVM atomics is in addition to the + consistency provided at synchronization points. + There is no need for explicit calls to {clEnqueueSVMMap} and + {clEnqueueSVMUnmap} or {clEnqueueMapBuffer} and + {clEnqueueUnmapMemObject} on a {cl_mem_TYPE} buffer object created using the + SVM pointer. + ** If SVM atomic operations are not supported, the host and devices can + concurrently read the same memory locations and can concurrently update + non-overlapping memory regions, but attempts to update the same memory + locations are undefined. + Memory consistency is guaranteed at synchronization points without the + need for explicit calls to {clEnqueueSVMMap} and {clEnqueueSVMUnmap} + or {clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} on a {cl_mem_TYPE} + buffer object created using the SVM pointer. + * There are two kinds of fine-grain sharing support. + Devices may support either fine-grain buffer sharing or fine-grain + system sharing. + ** Fine-grain buffer sharing provides fine-grain SVM only within buffers + and is an extension of coarse-grain sharing. + To support fine-grain buffer sharing in an OpenCL context, all devices + in the context must support {CL_DEVICE_SVM_FINE_GRAIN_BUFFER}. + ** Fine-grain system sharing enables fine-grain sharing of the host's + entire virtual memory, including memory regions allocated by the system + *malloc* API. + OpenCL buffer objects are unnecessary and programmers can pass pointers + allocated using *malloc* to OpenCL kernels. -// refError +As an illustration of fine-grain SVM using SVM atomic operations to maintain +memory consistency, consider the following example. +The host and a set of devices can simultaneously access and update a shared +work-queue data structure holding work-items to be done. +The host can use atomic operations to insert new work-items into the queue +at the same time as the devices using similar atomic operations to remove +work-items for processing. -{clGetSamplerInfo} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +It is the programmer's responsibility to ensure that no host code or +executing kernels attempt to access a shared memory region after that memory +is freed. +We require the SVM implementation to work with either 32- or 64- bit host +applications subject to the following requirement: the address space size +must be the same for the host and all OpenCL devices in the context. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_SAMPLER} if _sampler_ is a not a valid sampler object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +[open,refpage='clSVMAlloc',desc='Allocates a shared virtual memory (SVM) buffer that can be shared by the host and all devices in an OpenCL context that support shared virtual memory.',type='protos'] -- +To allocate a shared virtual memory buffer (referred to as a SVM buffer) +that can be shared by the host and all devices in an OpenCL context that +support shared virtual memory, call the function + +include::{generated}/api/protos/clSVMAlloc.txt[] +include::{generated}/api/version-notes/clSVMAlloc.asciidoc[] + * _context_ is a valid OpenCL context used to create the SVM buffer. + * _flags_ is a bit-field that is used to specify allocation and usage + information. + The <> table describes the possible values + for _flags_. + * _size_ is the size in bytes of the SVM buffer to be allocated. + * _alignment_ is the minimum alignment in bytes that is required for the newly + created buffers memory region. + It must be a power of two up to the largest data type supported by the + OpenCL device. + For the full profile, the largest data type is long16. + For the embedded profile, it is long16 if the device supports 64-bit + integers; otherwise it is int16. + If alignment is 0, a default alignment will be used that is equal to the + size of largest data type supported by the OpenCL implementation. -== Program Objects +[[svm-flags-table]] +.List of supported SVM memory flag values +[width="100%",cols="<50%,<50%",options="header"] +|==== +| SVM Memory Flags | Description +| {CL_MEM_READ_WRITE} + | This flag specifies that the SVM buffer will be read and written by a + kernel. + This is the default. +| {CL_MEM_WRITE_ONLY} + | This flag specifies that the SVM buffer will be written but not read by + a kernel. -An OpenCL program consists of a set of kernels that are identified as -functions declared with the `+__kernel+` qualifier in the program source. -OpenCL programs may also contain auxiliary functions and constant data that -can be used by kernel functions. -The program executable can be generated _online_ or _offline_ by the OpenCL -compiler for the appropriate target device(s). + Reading from a SVM buffer created with {CL_MEM_WRITE_ONLY} inside a kernel + is undefined. -A program object encapsulates the following information: + {CL_MEM_READ_WRITE} and {CL_MEM_WRITE_ONLY} are mutually exclusive. +| {CL_MEM_READ_ONLY} + | This flag specifies that the SVM buffer object is a read-only memory + object when used inside a kernel. - * An associated context. - * A program source or binary. - * The latest successfully built program executable, library or compiled - binary, the list of devices for which the program executable, library or - compiled binary is built, the build options used and a build log. - * The number of kernel objects currently attached. + Writing to a SVM buffer created with {CL_MEM_READ_ONLY} inside a kernel is + undefined. + {CL_MEM_READ_WRITE} or {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_ONLY} are mutually + exclusive. +| {CL_MEM_SVM_FINE_GRAIN_BUFFER_anchor} -=== Creating Program Objects +include::{generated}/api/version-notes/CL_MEM_SVM_FINE_GRAIN_BUFFER.asciidoc[] + | This specifies that the application wants the OpenCL implementation to + do a fine-grained allocation. +| {CL_MEM_SVM_ATOMICS_anchor} -[open,refpage='clCreateProgramWithSource',desc='Creates a program object for a context, and loads source code specified by text strings into the program object.',type='protos'] --- -To creates a program object for a context and load source code into that -object, call the function +include::{generated}/api/version-notes/CL_MEM_SVM_ATOMICS.asciidoc[] + | This flag is valid only if {CL_MEM_SVM_FINE_GRAIN_BUFFER} is specified in + flags. + It is used to indicate that SVM atomic operations can control visibility + of memory accesses in this SVM buffer. +|==== -include::{generated}/api/protos/clCreateProgramWithSource.txt[] -include::{generated}/api/version-notes/clCreateProgramWithSource.asciidoc[] +If {CL_MEM_SVM_FINE_GRAIN_BUFFER} is not specified, the buffer can be created +as a coarse grained SVM allocation. +Similarly, if {CL_MEM_SVM_ATOMICS} is not specified, the buffer can be created +without support for SVM atomic operations (refer to an OpenCL kernel +language specifications). - * _context_ must be a valid OpenCL context. - * _strings_ is an array of _count_ pointers to optionally null-terminated - character strings that make up the source code. - * _lengths_ argument is an array with the number of chars in each string - (the string length). - If an element in _lengths_ is zero, its accompanying string is - null-terminated. - If _lengths_ is `NULL`, all strings in the _strings_ argument are considered - null-terminated. - Any length value passed in that is greater than zero excludes the null - terminator in its count. - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +Calling {clSVMAlloc} does not itself provide consistency for the shared +memory region. +When the host cannot use the SVM atomic operations, it must rely on OpenCL's +guaranteed memory consistency at synchronization points. -The source code specified by _strings_ will be loaded into the program -object. +For SVM to be used efficiently, the host and any devices sharing a buffer +containing virtual memory pointers should have the same endianness. +If the context passed to {clSVMAlloc} has devices with mixed endianness and +the OpenCL implementation is unable to implement SVM because of that mixed +endianness, {clSVMAlloc} will fail and return `NULL`. -The devices associated with the program object are the devices associated -with _context_. -The source code specified by _strings_ is either an OpenCL C program source, -header or implementation-defined source for custom devices that support an -online compiler. -OpenCL {cpp} is not supported as an online-compiled kernel language through -this interface. +Although SVM is generally not supported for image objects, {clCreateImage} +and {clCreateImageWithProperties} +may create an image from a buffer (a 1D image from a buffer or a 2D image +from buffer) if the buffer specified in its image description parameter is a +SVM buffer. +Such images have a linear memory representation so their memory can be +shared using SVM. +However, fine grained sharing and atomics are not supported for image reads +and writes in a kernel. // refError -{clCreateProgramWithSource} returns a valid non-zero program object and -_errcode_ret_ is set to {CL_SUCCESS} if the program object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +{clSVMAlloc} returns a valid non-`NULL` shared virtual memory address if the +SVM buffer is successfully allocated. +Otherwise, like *malloc*, it returns a `NULL` pointer value. +{clSVMAlloc} will fail if - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if _count_ is zero or if _strings_ or any entry in - _strings_ is `NULL`. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. + * _context_ is not a valid context, or no devices in _context_ support SVM. + * _flags_ does not contain {CL_MEM_SVM_FINE_GRAIN_BUFFER} but does contain + {CL_MEM_SVM_ATOMICS}. + * Values specified in _flags_ do not follow rules described for supported + values in the <> table. + * {CL_MEM_SVM_FINE_GRAIN_BUFFER} or {CL_MEM_SVM_ATOMICS} is specified in + _flags_ and these are not supported by at least one device in _context_. + * The values specified in _flags_ are not valid, i.e. do not match those + defined in the <> table. + * _size_ is 0 or > {CL_DEVICE_MAX_MEM_ALLOC_SIZE} value for any device in + _context_. + * _alignment_ is not a power of two or the OpenCL implementation cannot + support the specified alignment for at least one device in _context_. + * There was a failure to allocate resources. -- -[open,refpage='clCreateProgramWithIL',desc='Creates a program object for a context, and loads the IL into the program object.',type='protos'] +[open,refpage='clSVMFree',desc='Frees a shared virtual memory buffer allocated using clSVMAlloc.',type='protos'] -- -To create a program object for a context and load code in an intermediate -language into that object, call the function - -include::{generated}/api/protos/clCreateProgramWithIL.txt[] -include::{generated}/api/version-notes/clCreateProgramWithIL.asciidoc[] -Also see extension *cl_khr_il_program*. +To free a shared virtual memory buffer allocated using {clSVMAlloc}, call +the function - * _context_ must be a valid OpenCL context. - * _il_ is a pointer to a _length_-byte block of memory containing SPIR-V or an - implementation-defined intermediate language. - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +include::{generated}/api/protos/clSVMFree.txt[] +include::{generated}/api/version-notes/clSVMFree.asciidoc[] -The intermediate language pointed to by _il_ and with length in bytes _length_ -will be loaded into the program object. -The devices associated with the program object are the devices associated -with _context_. + * _context_ is a valid OpenCL context used to create the SVM buffer. + If no devices in _context_ support SVM, no action occurs. + * _svm_pointer_ must be the value returned by a call to {clSVMAlloc}. + If a `NULL` pointer is passed in _svm_pointer_, no action occurs. -// refError +Note that {clSVMFree} does not wait for previously enqueued commands that +may be using _svm_pointer_ to finish before freeing _svm_pointer_. +It is the responsibility of the application to make sure that enqueued +commands that use _svm_pointer_ have finished before freeing _svm_pointer_. +This can be done by enqueuing a blocking operation such as {clFinish}, +{clWaitForEvents}, {clEnqueueReadBuffer} or by registering a callback with +the events associated with enqueued commands and when the last enqueued +command has finished freeing _svm_pointer_. -{clCreateProgramWithIL} returns a valid non-zero program object and -_errcode_ret_ is set to {CL_SUCCESS} if the program object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +The behavior of using _svm_pointer_ after it has been freed is undefined. +In addition, if a buffer object is created using {clCreateBuffer} or +{clCreateBufferWithProperties} with _svm_pointer_, the buffer object must +first be released before the _svm_pointer_ is freed. - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_OPERATION} if no devices in _context_ support intermediate - language programs. - * {CL_INVALID_VALUE} if _il_ is `NULL` or if _length_ is zero. - * {CL_INVALID_VALUE} if the _length_-byte memory pointed to by _il_ does not - contain well-formed intermediate language input that can be consumed by - the OpenCL runtime. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +The {clEnqueueSVMFree} API can also be used to enqueue a callback to free +the shared virtual memory buffer allocated using {clSVMAlloc} or a shared +system memory pointer. -- -[open,refpage='clCreateProgramWithBinary',desc='Creates a program object for a context, and loads binary bits into the program object.',type='protos'] +[open,refpage='clEnqueueSVMFree',desc='Enqueues a command to free shared virtual memory allocated using clSVMAlloc or a shared system memory pointer.',type='protos'] -- -To create a program object for a context and load binary bits into that -object, call the function - -include::{generated}/api/protos/clCreateProgramWithBinary.txt[] -include::{generated}/api/version-notes/clCreateProgramWithBinary.asciidoc[] - - * _context_ must be a valid OpenCL context. - * _device_list_ is a pointer to a list of devices that are in _context_. - _device_list_ must be a non-`NULL` value. - The binaries are loaded for devices specified in this list. - * _num_devices_ is the number of devices listed in _device_list_. - * _lengths_ is an array of the size in bytes of the program binaries to be - loaded for devices specified by _device_list_. - * _binaries_ is an array of pointers to program binaries to be loaded for - devices specified by _device_list_. - For each device given by _device_list_[i], the pointer to the program binary - for that device is given by _binaries_[i] and the length of this - corresponding binary is given by _lengths_[i]. - _lengths_[i] cannot be zero and _binaries_[i] cannot be a `NULL` pointer. - -The devices associated with the program object will be the list of devices -specified by _device_list_. -The list of devices specified by _device_list_ must be devices associated -with _context_. - -The program binaries specified by _binaries_ will be loaded into the program -object. -They contain bits that describe one of the following: - - * a program executable to be run on the device(s) associated with - _context_, - * a compiled program for device(s) associated with _context_, or - * a library of compiled programs for device(s) associated with _context_. - -The program binary can consist of either or both: - - * Device-specific code and/or, - * Implementation-specific intermediate representation (IR) which will be - converted to the device-specific code. +To enqueue a command to free the shared virtual memory allocated using +{clSVMAlloc} or a shared system memory pointer, call the function - * _binary_status_ returns whether the program binary for each device specified - in _device_list_ was loaded successfully or not. - It is an array of _num_devices_ entries and returns {CL_SUCCESS} in - _binary_status_[i] if binary was successfully loaded for device specified by - _device_list_[i]; otherwise returns {CL_INVALID_VALUE} if _lengths_[i] is zero - or if _binaries_[i] is a `NULL` value or {CL_INVALID_BINARY} in - _binary_status_[i] if program binary is not a valid binary for the specified - device. - If _binary_status_ is `NULL`, it is ignored. - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +include::{generated}/api/protos/clEnqueueSVMFree.txt[] +include::{generated}/api/version-notes/clEnqueueSVMFree.asciidoc[] -OpenCL allows applications to create a program object using the program -source or binary and build appropriate program executables. -This can be very useful as it allows applications to load program source and -then compile and link to generate a program executable online on its first -instance for appropriate OpenCL devices in the system. -These executables can now be queried and cached by the application. -The cached executables can be read and loaded by the application, which can -help significantly reduce the application initialization time. + * _command_queue_ is a valid host command-queue. + * _svm_pointers_ and _num_svm_pointers_ specify shared virtual memory pointers + to be freed. + Each pointer in _svm_pointers_ that was allocated using {clSVMAlloc} must + have been allocated from the same context from which _command_queue_ was + created. + The memory associated with _svm_pointers_ can be reused or freed after the + function returns. + * _pfn_free_func_ specifies the callback function to be called to free the SVM + pointers. + This callback function may be called asynchronously by the OpenCL + implementation. + It is the application's responsibility to ensure that the callback function + is thread-safe. + _pfn_free_func_ takes four arguments: _queue_ which is the command-queue in + which {clEnqueueSVMFree} was enqueued, the count and list of SVM pointers to + free and _user_data_ which is a pointer to user specified data. + If _pfn_free_func_ is `NULL`, all pointers specified in _svm_pointers_ must + be allocated using {clSVMAlloc} and the OpenCL implementation will free + these SVM pointers. + _pfn_free_func_ must be a valid callback function if any SVM pointer to be + freed is a shared system memory pointer i.e. not allocated using + {clSVMAlloc}. + If _pfn_free_func_ is a valid callback function, the OpenCL implementation + will call _pfn_free_func_ to free all the SVM pointers specified in + _svm_pointers_. + * _user_data_ will be passed as the _user_data_ argument when _pfn_free_func_ + is called. + _user_data_ can be `NULL`. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before {clEnqueueSVMFree} can be executed. + If _event_wait_list_ is `NULL`, then {clEnqueueSVMFree} does not wait on any + event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. // refError -{clCreateProgramWithBinary} returns a valid non-zero program object and -_errcode_ret_ is set to {CL_SUCCESS} if the program object is created +{clEnqueueSVMFree} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +Otherwise, it returns one of the following errors: - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if _device_list_ is `NULL` or _num_devices_ is zero. - * {CL_INVALID_DEVICE} if any device in _device_list_ is not in - the list of devices associated with _context_. - * {CL_INVALID_VALUE} if _lengths_ or _binaries_ is `NULL` or if any entry - in _lengths_[i] is zero or _binaries_[i] is `NULL`. - * {CL_INVALID_BINARY} if an invalid program binary was encountered for any - device. - _binary_status_ will return specific status for each device. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_VALUE} if _num_svm_pointers_ is 0 and _svm_pointers_ is + non-`NULL`, _or_ if _svm_pointers_ is `NULL` and _num_svm_pointers_ is + not 0. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clCreateProgramWithBuiltInKernels',desc='Creates a program object for a context, and loads the information related to the built-in kernels into a program object.',type='protos'] +[open,refpage='clEnqueueSVMMemcpy',desc='Enqueues a command to do a memcpy operation.',type='protos'] -- -To create a program object for a context and loads the information related -to the built-in kernels into that object, call the function +To enqueue a command to do a memcpy operation, call the function -include::{generated}/api/protos/clCreateProgramWithBuiltInKernels.txt[] -include::{generated}/api/version-notes/clCreateProgramWithBuiltInKernels.asciidoc[] +include::{generated}/api/protos/clEnqueueSVMMemcpy.txt[] +include::{generated}/api/version-notes/clEnqueueSVMMemcpy.asciidoc[] - * _context_ must be a valid OpenCL context. - * _num_devices_ is the number of devices listed in _device_list_. - * _device_list_ is a pointer to a list of devices that are in _context_. - _device_list_ must be a non-`NULL` value. - The built-in kernels are loaded for devices specified in this list. - * _kernel_names_ is a semi-colon separated list of built-in kernel names. + * _command_queue_ refers to the host command-queue in which the read / write + command will be queued. + If either _dst_ptr_ or _src_ptr_ is allocated using {clSVMAlloc} then the + OpenCL context allocated against must match that of _command_queue_. + * _blocking_copy_ indicates if the copy operation is _blocking_ or + _non-blocking_. + * If _blocking_copy_ is {CL_TRUE} i.e. the copy command is blocking, + {clEnqueueSVMMemcpy} does not return until the buffer data has been copied + into memory pointed to by _dst_ptr_. + * _size_ is the size in bytes of data being copied. + * _dst_ptr_ is the pointer to a host or SVM memory allocation where data is + copied to. + * _src_ptr_ is the pointer to a host or SVM memory allocation where data is + copied from. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this read / write command + and can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -The devices associated with the program object will be the list of devices -specified by _device_list_. -The list of devices specified by _device_list_ must be devices associated -with _context_. +If _blocking_copy_ is {CL_FALSE} i.e. the copy command is non-blocking, +{clEnqueueSVMMemcpy} queues a non-blocking copy command and returns. +The contents of the buffer that _dst_ptr_ points to cannot be used until the +copy command has completed. +The _event_ argument returns an event object which can be used to query the +execution status of the read command. +When the copy command has completed, the contents of the buffer that +_dst_ptr_ points to can be used by the application. + +If the memory allocation(s) containing _dst_ptr_ and/or _src_ptr_ are +allocated using {clSVMAlloc} and either is not allocated from the same +context from which _command_queue_ was created the behavior is undefined. // refError -{clCreateProgramWithBuiltInKernels} returns a valid non-zero program object -and _errcode_ret_ is set to {CL_SUCCESS} if the program object is created +{clEnqueueSVMMemcpy} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +Otherwise, it returns one of the following errors: - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if _device_list_ is `NULL` or _num_devices_ is zero. - * {CL_INVALID_VALUE} if _kernel_names_ is `NULL` or _kernel_names_ contains - a kernel name that is not supported by any of the devices in - _device_list_. - * {CL_INVALID_DEVICE} if any device in _device_list_ is not in the list - of devices associated with _context_. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and + events in _event_wait_list_ are not the same. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the copy operation is + blocking and the execution status of any of the events in + _event_wait_list_ is a negative integer value. + * {CL_INVALID_VALUE} if _dst_ptr_ or _src_ptr_ is `NULL`. + * {CL_MEM_COPY_OVERLAP} if the values specified for _dst_ptr_, _src_ptr_ and + _size_ result in an overlapping copy. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- - -=== Retaining and Releasing Program Objects - -[open,refpage='clRetainProgram',desc='Increments the program reference count.',type='protos'] +[open,refpage='clEnqueueSVMMemFill',desc='Enqueues a command to fill a region in memory with a pattern of a given pattern size.',type='protos'] -- -To retain a program object, call the function - -include::{generated}/api/protos/clRetainProgram.txt[] -include::{generated}/api/version-notes/clRetainProgram.asciidoc[] +To enqueue a command to fill a region in memory with a pattern of a given +pattern size, call the function - * _program_ is the program object to be retained. +include::{generated}/api/protos/clEnqueueSVMMemFill.txt[] +include::{generated}/api/version-notes/clEnqueueSVMMemFill.asciidoc[] -The _program_ reference count is incremented. -All APIs that create a program do an implicit retain. + * _command_queue_ refers to the host command-queue in which the fill command + will be queued. + The OpenCL context associated with _command_queue_ and SVM pointer referred + to by _svm_ptr_ must be the same. + * _svm_ptr_ is a pointer to a memory region that will be filled with + _pattern_. + It must be aligned to _pattern_size_ bytes. + If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from + the same context from which _command_queue_ was created. + Otherwise the behavior is undefined. + * _pattern_ is a pointer to the data pattern of size _pattern_size_ in bytes. + _pattern_ will be used to fill a region in _buffer_ starting at _svm_ptr_ + and is _size_ bytes in size. + The data pattern must be a scalar or vector integer or floating-point data + type supported by OpenCL as described in <> and <>. + For example, if region pointed to by _svm_ptr_ is to be filled with a + pattern of float4 values, then _pattern_ will be a pointer to a cl_float4 + value and _pattern_size_ will be `sizeof(cl_float4)`. + The maximum value of _pattern_size_ is the size of the largest integer or + floating-point vector data type supported by the OpenCL device. + The memory associated with _pattern_ can be reused or freed after the + function returns. + * _size_ is the size in bytes of region being filled starting with _svm_ptr_ + and must be a multiple of _pattern_size_. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. // refError -{clRetainProgram} returns {CL_SUCCESS} if the function is executed +{clEnqueueSVMMemFill} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and + events in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. + * {CL_INVALID_VALUE} if _svm_ptr_ is not aligned to _pattern_size_ bytes. + * {CL_INVALID_VALUE} if _pattern_ is `NULL` or if _pattern_size_ is 0 or if + _pattern_size_ is not one of {1, 2, 4, 8, 16, 32, 64, 128}. + * {CL_INVALID_VALUE} if _size_ is not a multiple of _pattern_size_. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clReleaseProgram',desc='Decrements the program reference count.',type='protos'] +[open,refpage='clEnqueueSVMMap',desc='Enqueues a command that will allow the host to update a region of a SVM buffer',type='protos'] -- -To release a program object, call the function +To enqueue a command that will allow the host to update a region of a SVM +buffer, call the function -include::{generated}/api/protos/clReleaseProgram.txt[] -include::{generated}/api/version-notes/clReleaseProgram.asciidoc[] +include::{generated}/api/protos/clEnqueueSVMMap.txt[] +include::{generated}/api/version-notes/clEnqueueSVMMap.asciidoc[] - * _program_ is the program object to be released. + * _command_queue_ must be a valid host command-queue. + * _blocking_map_ indicates if the map operation is _blocking_ or + _non-blocking_. + * _map_flags_ is a bit-field and is described in the + <> table. + * _svm_ptr_ and _size_ are a pointer to a memory region and size in bytes that + will be updated by the host. + If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from + the same context from which _command_queue_ was created. + Otherwise the behavior is undefined. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -The _program_ reference count is decremented. -The program object is deleted after all kernel objects associated with -_program_ have been deleted and the _program_ reference count becomes zero. +If _blocking_map_ is {CL_TRUE}, {clEnqueueSVMMap} does not return until the +application can access the contents of the SVM region specified by _svm_ptr_ +and _size_ on the host. + +If _blocking_map_ is {CL_FALSE} i.e. map operation is non-blocking, the region +specified by _svm_ptr_ and _size_ cannot be used until the map command has +completed. +The _event_ argument returns an event object which can be used to query the +execution status of the map command. +When the map command is completed, the application can access the contents +of the region specified by _svm_ptr_ and _size_. + +Note that since we are enqueuing a command with a SVM buffer, the region is +already mapped in the host address space. // refError -{clReleaseProgram} returns {CL_SUCCESS} if the function is executed +{clEnqueueSVMMap} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. + * {CL_INVALID_VALUE} if _size_ is 0 or if values specified in _map_flags_ + are not valid. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the map operation is + blocking and the execution status of any of the events in + _event_wait_list_ is a negative integer value. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -Using this function to release a reference that was not obtained by creating -the object or by calling {clRetainProgram} causes undefined behavior. -- -[open,refpage='clSetProgramReleaseCallback',desc='Registers a destructor callback function with a program object.',type='protos'] +[open,refpage='clEnqueueSVMUnmap',desc='Enqueues a command to indicate that the host has completed updating the region given by an SVM pointer and which was specified in a previous call to clEnqueueSVMMap.',type='protos'] -- -To register a callback function with a program object that is called when -the program object is destroyed, call the function +To enqueue a command to indicate that the host has completed updating the +region given by _svm_ptr_ and which was specified in a previous call to +{clEnqueueSVMMap}, call the function -include::{generated}/api/protos/clSetProgramReleaseCallback.txt[] -include::{generated}/api/version-notes/clSetProgramReleaseCallback.asciidoc[] +include::{generated}/api/protos/clEnqueueSVMUnmap.txt[] +include::{generated}/api/version-notes/clEnqueueSVMUnmap.asciidoc[] - * _program_ specifies the memory object to register the callback to. - * _pfn_notify_ is the callback function to register. - This callback function may be called asynchronously by the OpenCL - implementation. - It is the application's responsibility to ensure that the callback function - is thread-safe. - The parameters to this callback function are: - ** _program_ is the program being deleted. - When the callback function is called by the implementation, this program - object is not longer valid. - _program_ is only provided for reference purposes. - ** _user_data_ is a pointer to user supplied data. - * _user_data_ will be passed as the _user_data_ argument when _pfn_notify_ is - called. - _user_data_ can be `NULL`. + * _command_queue_ must be a valid host command-queue. + * _svm_ptr_ is a pointer that was specified in a previous call to + {clEnqueueSVMMap}. + If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from + the same context from which _command_queue_ was created. + Otherwise the behavior is undefined. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before {clEnqueueSVMUnmap} can be executed. + If _event_wait_list_ is `NULL`, then {clEnqueueSVMUnmap} does not wait on any + event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -Each call to {clSetProgramReleaseCallback} registers the specified -callback function on a callback stack associated with _program_. -The registered callback functions are called in the reverse order in -which they were registered. -The registered callback functions are called after destructors (if any) for -program scope global variables (if any) are called and before the program -object is deleted. -This provides a mechanism for an application to be notified when destructors -for program scope global variables are complete. +{clEnqueueSVMMap} and {clEnqueueSVMUnmap} act as synchronization points for +the region of the SVM buffer specified in these calls. // refError -{clSetProgramReleaseCallback} may unconditionally return an error if no -devices in the context associated with _program_ support destructors for -program scope global variables. -Support for constructors and destructors for program scope global variables -is required only for OpenCL 2.2 devices. - -{clSetProgramReleaseCallback} returns {CL_SUCCESS} if the function is executed +{clEnqueueSVMUnmap} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. - * {CL_INVALID_OPERATION} if no devices in the context associated with - _program_ support destructors for program scope global variables. - * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if _svm_ptr_ is `NULL`. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or if _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. --- +[NOTE] +==== +If a coarse-grained SVM buffer is currently mapped for writing, the +application must ensure that the SVM buffer is unmapped before any enqueued +kernels or commands that read from or write to this SVM buffer or any of its +associated {cl_mem_TYPE} buffer objects begin execution; otherwise the behavior is +undefined. -=== Setting SPIR-V specialization constants +If a coarse-grained SVM buffer is currently mapped for reading, the +application must ensure that the SVM buffer is unmapped before any enqueued +kernels or commands that write to this memory object or any of its +associated {cl_mem_TYPE} buffer objects begin execution; otherwise the behavior is +undefined. -NOTE: Specialization constants are <> version -2.2. +A SVM buffer is considered as mapped if there are one or more active +mappings for the SVM buffer irrespective of whether the mapped regions span +the entire SVM buffer. -[open,refpage='clSetProgramSpecializationConstant',desc='',type='protos'] +The above note does not apply to fine-grained SVM buffers (fine-grained +buffers allocated using {clSVMAlloc} or fine-grained system allocations). +==== -- -To set the value of a specialization constant, call the function -include::{generated}/api/protos/clSetProgramSpecializationConstant.txt[] -include::{generated}/api/version-notes/clSetProgramSpecializationConstant.asciidoc[] +[open,refpage='clEnqueueSVMMigrateMem',desc='Enqueues a command to indicate which device a set of ranges of SVM allocations should be associated with.',type='protos'] +-- +To enqueue a command to indicate which device a set of ranges of SVM +allocations should be associated with, call the function - * _program_ must be a valid OpenCL program created from an intermediate - language (e.g. SPIR-V). - * _spec_id_ identifies the specialization constant whose value will be - set. - * _spec_size_ specifies the size in bytes of the data pointed to by - _spec_value_. - This should be 1 for boolean constants. - For all other constant types this should match the size of the - specialization constant in the module. - * _spec_value_ is a pointer to the memory location that contains the value of - the specialization constant. - The data pointed to by _spec_value_ are copied and can be safely reused by - the application after {clSetProgramSpecializationConstant} returns. - This specialization value will be used by subsequent calls to - {clBuildProgram} until another call to {clSetProgramSpecializationConstant} - changes it. - If a specialization constant is a boolean constant, _spec_value_ should be a - pointer to a {cl_uchar_TYPE} value. - A value of zero will set the specialization constant to false; any other - value will set it to true. +include::{generated}/api/protos/clEnqueueSVMMigrateMem.txt[] +include::{generated}/api/version-notes/clEnqueueSVMMigrateMem.asciidoc[] -Calling this function multiple times for the same specialization constant -shall cause the last provided value to override any previously specified -value. -The values are used by a subsequent {clBuildProgram} call for the _program_. + * _command_queue_ is a valid host command-queue. + The specified set of allocation ranges will be migrated to the OpenCL device + associated with _command_queue_. + * _num_svm_pointers_ is the number of pointers in the specified _svm_pointers_ + array, and the number of sizes in the _sizes_ array, if _sizes_ is not + `NULL`. + * _svm_pointers_ is a pointer to an array of pointers. + Each pointer in this array must be within an allocation produced by a call + to {clSVMAlloc}. + * _sizes_ is an array of sizes. + The pair _svm_pointers_[i] and _sizes_[i] together define the starting + address and number of bytes in a range to be migrated. + _sizes_ may be `NULL` indicating that every allocation containing any + _svm_pointer_[i] is to be migrated. + Also, if _sizes_[i] is zero, then the entire allocation containing + _svm_pointer_[i] is migrated. + * _flags_ is a bit-field that is used to specify migration options. + The <> describes the possible + values for _flags_. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or queue a wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. -Application is not required to provide values for every specialization -constant contained in the module. If the value is not set by this API -call, default values will be used during the build. +Once the event returned by {clEnqueueSVMMigrateMem} has become {CL_COMPLETE}, +the ranges specified by svm pointers and sizes have been successfully +migrated to the device associated with command-queue. -// refError +The user is responsible for managing the event dependencies associated with +this command in order to avoid overlapping access to SVM allocations. +Improperly specified event dependencies passed to {clEnqueueSVMMigrateMem} +could result in undefined results. -{clSetProgramSpecializationConstant} returns {CL_SUCCESS} if the function is -executed successfully. +// refError +{clEnqueueSVMMigrateMem} returns {CL_SUCCESS} if the function is executed +successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object created - from an intermediate language (e.g. SPIR-V), or if the intermediate - language does not support specialization constants. - * {CL_INVALID_OPERATION} if no devices associated with _program_ support - intermediate language programs. - * {CL_COMPILER_NOT_AVAILABLE} if _program_ is created with - {clCreateProgramWithIL} and a compiler is not - available, i.e. {CL_DEVICE_COMPILER_AVAILABLE} specified in the - <> table is set to {CL_FALSE}. - * {CL_INVALID_SPEC_ID} if _spec_id_ is not a valid specialization constant - identifier. - * {CL_INVALID_VALUE} if _spec_size_ does not match the size of the - specialization constant in the module, or if _spec_value_ is + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not support SVM. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if _num_svm_pointers_ is zero or _svm_pointers_ is `NULL`. + * {CL_INVALID_VALUE} if _sizes_[i] is non-zero range [_svm_pointers_[i], + _svm_pointers_[i]+_sizes_[i]) is not contained within an existing + {clSVMAlloc} allocation. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or if _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -5772,1319 +7985,1243 @@ Otherwise, it returns one of the following errors: -- -=== Building Program Executables +=== Memory Consistency for SVM Allocations -[open,refpage='clBuildProgram',desc='Builds (compiles and links) a program executable from the program source or binary.',type='protos'] --- -To build (compile & link) a program executable, call the function +To ensure memory consistency in SVM allocations, the program can rely on the +guaranteed memory consistency at synchronization points. +This consistency support already exists in OpenCL 1.x and can be used for +coarse-grained SVM allocations or for fine-grained buffer SVM allocations; +what SVM adds is the ability to share pointers between the host and all SVM +devices. -include::{generated}/api/protos/clBuildProgram.txt[] -include::{generated}/api/version-notes/clBuildProgram.asciidoc[] +In addition, sub-buffers can also be used to ensure that each device gets a +consistent view of a SVM buffers memory when it is shared by multiple +devices. +For example, assume that two devices share a SVM pointer. +The host can create a {cl_mem_TYPE} buffer object using {clCreateBuffer} or +{clCreateBufferWithProperties} with {CL_MEM_USE_HOST_PTR} and _host_ptr_ set +to the SVM pointer and then create two disjoint sub-buffers with starting +virtual addresses _sb1_ptr_ and _sb2_ptr_. +These pointers (_sb1_ptr_ and _sb2_ptr_) can be passed to kernels executing +on the two devices. +{clEnqueueMapBuffer} and {clEnqueueUnmapMemObject} and the existing +<> ensure +consistency for buffer regions (_sb1_ptr_ and _sb2_ptr_) read and written by +these kernels. - * _program_ is the program object. - * _device_list_ is a pointer to a list of devices associated with _program_. - If _device_list_ is a `NULL` value, the program executable is built for all - devices associated with _program_ for which a source or binary has been - loaded. - If _device_list_ is a non-`NULL` value, the program executable is built for - devices specified in this list for which a source or binary has been loaded. - * _num_devices_ is the number of devices listed in _device_list_. - * _options_ is a pointer to a null-terminated string of characters that - describes the build options to be used for building the program executable. - The list of supported options is described in <>. - If the program was created using {clCreateProgramWithBinary} and _options_ - is a `NULL` pointer, the program will be built as if _options_ were the same - as when the program binary was originally built. - If the program was created using {clCreateProgramWithBinary} and _options_ - string contains anything other than the same options in the same order - (whitespace ignored) as when the program binary was originally built, then - the behavior is implementation-defined. - Otherwise, if _options_ is a `NULL` pointer then it will have the same - result as the empty string. - * _pfn_notify_ is a function pointer to a notification routine. - The notification routine is a callback function that an application can - register and which will be called when the program executable has been built - (successfully or unsuccessfully). - If _pfn_notify_ is not `NULL`, {clBuildProgram} does not need to wait for - the build to complete and can return immediately once the build operation - can begin. - Any state changes of the program object that result from calling - {clBuildProgram} (e.g. build status or log) will be observable from this - callback function. - The build operation can begin if the context, program whose sources are - being compiled and linked, list of devices and build options specified are - all valid and appropriate host and device resources needed to perform the - build are available. - If _pfn_notify_ is `NULL`, {clBuildProgram} does not return until the build - has completed. - This callback function may be called asynchronously by the OpenCL - implementation. - It is the application's responsibility to ensure that the callback function - is thread-safe. - * _user_data_ will be passed as an argument when _pfn_notify_ is called. - _user_data_ can be `NULL`. +When the host and devices are able to use SVM atomic operations (i.e. +{CL_DEVICE_SVM_ATOMICS} is set in {CL_DEVICE_SVM_CAPABILITIES}), these atomic +operations can be used to provide memory consistency at a fine grain in a +shared memory region. +The effect of these operations is visible to the host and all devices with +which that memory is shared. -The program executable is built from the program source or binary for all -the devices, or a specific device(s) in the OpenCL context associated with -_program_. -OpenCL allows program executables to be built using the source or the -binary. -{clBuildProgram} must be called for _program_ created using -{clCreateProgramWithSource}, {clCreateProgramWithIL} or -{clCreateProgramWithBinary} to build the program executable for one or more -devices associated with _program_. -If _program_ is created with {clCreateProgramWithBinary}, then the program -binary must be an executable binary (not a compiled binary or library). -The executable binary can be queried using {clGetProgramInfo}(_program_, -{CL_PROGRAM_BINARIES}, ...) and can be specified to -{clCreateProgramWithBinary} to create a new program object. +== Sampler Objects -// refError +A sampler object describes how to sample an image when the image is read in +the kernel. +The built-in functions to read from an image in a kernel take a sampler as +an argument. +The sampler arguments to the image read function can be sampler objects +created using OpenCL functions and passed as argument values to the kernel +or can be samplers declared inside a kernel. +In this section we discuss how sampler objects are created using OpenCL +functions. -{clBuildProgram} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. - * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater - than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. - * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not - `NULL`. - * {CL_INVALID_DEVICE} if any device in _device_list_ is not in - the list of devices associated with _program_. - * {CL_INVALID_BINARY} if _program_ is created with - {clCreateProgramWithBinary} and devices listed in _device_list_ do not - have a valid program binary loaded. - * {CL_INVALID_BUILD_OPTIONS} if the build options specified by _options_ are - invalid. - * {CL_COMPILER_NOT_AVAILABLE} if _program_ is created with - {clCreateProgramWithSource} or {clCreateProgramWithIL} and a compiler is - not available, i.e. {CL_DEVICE_COMPILER_AVAILABLE} specified in the - <> table is set to {CL_FALSE}. - * {CL_BUILD_PROGRAM_FAILURE} if there is a failure to build the program - executable. - This error will be returned if {clBuildProgram} does not return until - the build has completed. - * {CL_INVALID_OPERATION} if the build of a program executable for any of the - devices listed in _device_list_ by a previous call to {clBuildProgram} - for _program_ has not completed. - * {CL_INVALID_OPERATION} if there are kernel objects attached to _program_. - * {CL_INVALID_OPERATION} if _program_ was not created with - {clCreateProgramWithSource}, {clCreateProgramWithIL} or - {clCreateProgramWithBinary}. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +=== Creating Sampler Objects + +[open,refpage='clCreateSamplerWithProperties',desc='Creates a sampler object.',type='protos'] -- +To create a sampler object, call the function +include::{generated}/api/protos/clCreateSamplerWithProperties.txt[] +include::{generated}/api/version-notes/clCreateSamplerWithProperties.asciidoc[] -=== Separate Compilation and Linking of Programs + * _context_ must be a valid OpenCL context. + * _sampler_properties_ specifies a list of sampler property names and their + corresponding values. + Each sampler property name is immediately followed by the corresponding + desired value. + The list is terminated with 0. + The list of supported properties is described in the + <> table. + If a supported property and its value is not specified in + _sampler_properties_, its default value will be used. + _sampler_properties_ can be `NULL` in which case the default values for + supported sampler properties will be used. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -NOTE: Separate compilation and linking are <> -version 1.2. +[[sampler-properties-table]] +.List of supported sampler creation properties by {clCreateSamplerWithProperties} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Sampler Property | Property Value | Description +| {CL_SAMPLER_NORMALIZED_COORDS_anchor} -OpenCL programs are compiled and linked to support the following: +include::{generated}/api/version-notes/CL_SAMPLER_NORMALIZED_COORDS.asciidoc[] + | {cl_bool_TYPE} + | A boolean value that specifies whether the image coordinates + specified are normalized or not. - * Separate compilation and link stages. - Program sources can be compiled to generate a compiled binary object and - linked in a separate stage with other compiled program objects to the - program executable. - * Embedded headers. - In OpenCL 1.0 and 1.1, the I build option could be used to specify the - list of directories to be searched for headers files that are included - by a program source(s). - OpenCL 1.2 extends this by allowing the header sources to come from - program objects instead of just header files. - * Libraries. - The linker can be used to link compiled objects and libraries into a - program executable or to create a library of compiled binaries. + The default value (i.e. the value used if this property is not + specified in sampler_properties) is {CL_TRUE}. +| {CL_SAMPLER_ADDRESSING_MODE_anchor} -[open,refpage='clCompileProgram',desc='Compiles a program\'s source for all the devices or a specific device(s) in the OpenCL context associated with a program.',type='protos'] --- -To compile a program's source for all the devices or a specific device(s) in -the OpenCL context associated with the program, call the function +include::{generated}/api/version-notes/CL_SAMPLER_ADDRESSING_MODE.asciidoc[] + | {cl_addressing_mode_TYPE} + | Specifies how out-of-range image coordinates are handled when + reading from an image. + Valid values are: -include::{generated}/api/protos/clCompileProgram.txt[] -include::{generated}/api/version-notes/clCompileProgram.asciidoc[] + {CL_ADDRESS_NONE_anchor} - Behavior is undefined for out-of-range + image coordinates. - * _program_ is the program object that is the compilation target. - * _device_list_ is a pointer to a list of devices associated with _program_. - If _device_list_ is a `NULL` value, the compile is performed for all devices - associated with _program_. - If _device_list_ is a non-`NULL` value, the compile is performed for devices - specified in this list. - * _num_devices_ is the number of devices listed in _device_list_. - * _options_ is a pointer to a null-terminated string of characters that - describes the compilation options to be used for building the program - executable. - If _options_ is a `NULL` pointer then it will have the same result as the - empty string. - Certain options are ignored when program is created with IL. - The list of supported options is as described in <>. - * _num_input_headers_ specifies the number of programs that describe headers - in the array referenced by _input_headers_. - * _input_headers_ is an array of program embedded headers created with - {clCreateProgramWithSource}. - * _header_include_names_ is an array that has a one to one correspondence with - _input_headers_. - Each entry in _header_include_names_ specifies the include name used by - source in _program_ that comes from an embedded header. - The corresponding entry in _input_headers_ identifies the program object - which contains the header source to be used. - The embedded headers are first searched before the headers in the list of - directories specified by the `-I` compile option (as described in - <>). - If multiple entries in _header_include_names_ refer to the same header name, - the first one encountered will be used. - * _pfn_notify_ is a function pointer to a notification routine. - The notification routine is a callback function that an application can - register and which will be called when the program executable has been built - (successfully or unsuccessfully). - If _pfn_notify_ is not `NULL`, {clCompileProgram} does not need to wait for - the compiler to complete and can return immediately once the compilation can - begin. - Any state changes of the program object that result from calling - {clCompileProgram} (e.g. compile status or log) will be observable from this - callback function. - The compilation can begin if the context, program whose sources are being - compiled, list of devices, input headers, programs that describe input - headers and compiler options specified are all valid and appropriate host - and device resources needed to perform the compile are available. - If _pfn_notify_ is `NULL`, {clCompileProgram} does not return until the - compiler has completed. - This callback function may be called asynchronously by the OpenCL - implementation. - It is the application's responsibility to ensure that the callback function - is thread-safe. - * _user_data_ will be passed as an argument when _pfn_notify_ is called. - _user_data_ can be `NULL`. + {CL_ADDRESS_CLAMP_TO_EDGE_anchor} - Out-of-range image coordinates + are clamped to the edge of the image. -The pre-processor runs before the program sources are compiled. -The compiled binary is built for all devices associated with _program_ or -the list of devices specified. -The compiled binary can be queried using {clGetProgramInfo}(_program_, -{CL_PROGRAM_BINARIES}, ...) and can be passed to {clCreateProgramWithBinary} -to create a new program object. + {CL_ADDRESS_CLAMP_anchor} - Out-of-range image coordinates are + assigned a border color value. -If _program_ was created using {clCreateProgramWithIL}, then -_num_input_headers_, _input_headers_, and _header_include_names_ are -ignored. + {CL_ADDRESS_REPEAT_anchor} - Out-of-range image coordinates read + from the image as if the image data were replicated in all dimensions. -For example, consider the following program source: + {CL_ADDRESS_MIRRORED_REPEAT_anchor} - Out-of-range image coordinates + read from the image as if the image data were replicated in all + dimensions, mirroring the image contents at the edge of each + replication. -[source,opencl_c] ----- -#include -#include -__kernel void -image_filter (int n, int m, - __constant float *filter_weights, - __read_only image2d_t src_image, - __write_only image2d_t dst_image) -{ -... -} ----- + The default is {CL_ADDRESS_CLAMP}. +| {CL_SAMPLER_FILTER_MODE_anchor} -This kernel includes two headers foo.h and mydir/myinc.h. -The following describes how these headers can be passed as embedded headers -in program objects: +include::{generated}/api/version-notes/CL_SAMPLER_FILTER_MODE.asciidoc[] + | {cl_filter_mode_TYPE} + | Specifies the type of filter that is applied when reading an + image. + Valid values are: -[source,opencl] ----- -cl_program foo_pg = clCreateProgramWithSource(context, - 1, &foo_header_src, NULL, &err); -cl_program myinc_pg = clCreateProgramWithSource(context, - 1, &myinc_header_src, NULL, &err); + {CL_FILTER_NEAREST_anchor} - Returns the image element nearest + to the image coordinate. -// lets assume the program source described above is given -// by program_A and is loaded via clCreateProgramWithSource -cl_program input_headers[2] = { foo_pg, myinc_pg }; -char * input_header_names[2] = { foo.h, mydir/myinc.h }; -clCompileProgram(program_A, - 0, NULL, // num_devices & device_list - NULL, // compile_options - 2, // num_input_headers - input_headers, - input_header_names, - NULL, NULL); // pfn_notify & user_data ----- + {CL_FILTER_LINEAR_anchor} - Returns a weighted average of the + four image elements nearest to the image coordinate. + + The default value is {CL_FILTER_NEAREST}. +ifdef::cl_khr_mipmap_image[] +| {CL_SAMPLER_MIP_FILTER_MODE_KHR_anchor} + +include::{generated}/api/version-notes/CL_SAMPLER_MIP_FILTER_MODE_KHR.asciidoc[] + | {cl_filter_mode_TYPE} + | Specifies the mipmap filter used when sampling from a mipmapped + image. + The available filter are: + + {CL_FILTER_NEAREST} - Use the nearest mipmap level to the image + coordinate. + + {CL_FILTER_LINEAR} - Use a weighted average of the two mipmap levels + nearest to the image coordinate. + + The default is {CL_FILTER_NEAREST}. +| {CL_SAMPLER_LOD_MIN_KHR_anchor} + +include::{generated}/api/version-notes/CL_SAMPLER_LOD_MIN_KHR.asciidoc[] + | {cl_float_TYPE} + | Specifies the minimum value to which the computed level of detail + _lambda_ is clamped when sampling from a mipmapped image. + + The default is `0.0f`. +| {CL_SAMPLER_LOD_MAX_KHR_anchor} + +include::{generated}/api/version-notes/CL_SAMPLER_LOD_MAX_KHR.asciidoc[] + | {cl_float_TYPE} + | Specifies the maximum value to which the computed level of detail + _lambda_ is clamped when sampling from a mipmapped image. + + The default is `MAXFLOAT`. +endif::cl_khr_mipmap_image[] +|==== + +ifdef::cl_khr_mipmap_image[] +NOTE: When the `<>` extension is supported, the sampler +properties {CL_SAMPLER_MIP_FILTER_MODE_KHR}, {CL_SAMPLER_LOD_MIN_KHR} and +{CL_SAMPLER_LOD_MAX_KHR} cannot be specified with any samplers initialized +in the OpenCL program source. +Only the default values for these properties will be used. +To create a sampler with specific values for these properties, a sampler +object must be created with {clCreateSamplerWithProperties} and passed as an +argument to a kernel. +endif::cl_khr_mipmap_image[] // refError -{clCompileProgram} returns {CL_SUCCESS} if the function is executed +{clCreateSamplerWithProperties} returns a valid non-zero sampler object and +_errcode_ret_ is set to {CL_SUCCESS} if the sampler object is created successfully. -Otherwise, it returns one of the following errors: +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. - * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater - than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. - * {CL_INVALID_VALUE} if _num_input_headers_ is zero and - _header_include_names_ or _input_headers_ are not `NULL` or if - _num_input_headers_ is not zero and _header_include_names_ or - _input_headers_ are `NULL`. - * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not - `NULL`. - * {CL_INVALID_DEVICE} if device in _device_list_ is not in - the list of devices associated with _program_. - * {CL_INVALID_COMPILER_OPTIONS} if the compiler options specified by - _options_ are invalid. - * {CL_INVALID_OPERATION} if the compilation or build of a program executable - for any of the devices listed in _device_list_ by a previous call to - {clCompileProgram} or {clBuildProgram} for _program_ has not completed. - * {CL_COMPILER_NOT_AVAILABLE} if a compiler is not available, i.e. - {CL_DEVICE_COMPILER_AVAILABLE} specified in the - <> table is set to {CL_FALSE}. - * {CL_COMPILE_PROGRAM_FAILURE} if there is a failure to compile the program - source. - This error will be returned if {clCompileProgram} does not return until - the compile has completed. - * {CL_INVALID_OPERATION} if there are kernel objects attached to _program_. - * {CL_INVALID_OPERATION} if _program_ has no source or IL available, i.e. it - has not been created with {clCreateProgramWithSource} or - {clCreateProgramWithIL}. + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if the property name in _sampler_properties_ is not a + supported property name, if the value specified for a supported property + name is not valid, or if the same property name is specified more than + once. + * {CL_INVALID_OPERATION} if images are not supported by any device + associated with _context_ (i.e. {CL_DEVICE_IMAGE_SUPPORT} specified in the + <> table is {CL_FALSE}). * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clLinkProgram',desc='Links a set of compiled program objects and libraries for all the devices or a specific device(s) in the OpenCL context and creates a library or executable.',type='protos'] +[open,refpage='clCreateSampler',desc='Creates a sampler object.',type='protos'] -- -To link a set of compiled program objects and libraries for all the devices -or a specific device(s) in the OpenCL context and create a library or -executable, call the function +To create a sampler object, call the function -include::{generated}/api/protos/clLinkProgram.txt[] -include::{generated}/api/version-notes/clLinkProgram.asciidoc[] +include::{generated}/api/protos/clCreateSampler.txt[] +include::{generated}/api/version-notes/clCreateSampler.asciidoc[] * _context_ must be a valid OpenCL context. - * _device_list_ is a pointer to a list of devices that are in _context_. - If _device_list_ is a `NULL` value, the link is performed for all devices - associated with _context_ for which a compiled object is available. - If _device_list_ is a non-`NULL` value, the link is performed for devices - specified in this list for which a compiled object is available. - * _num_devices_ is the number of devices listed in _device_list_. - * _options_ is a pointer to a null-terminated string of characters that - describes the link options to be used for building the program executable. - The list of supported options is as described in <>. - If the program was created using {clCreateProgramWithBinary} and _options_ - is a `NULL` pointer, the program will be linked as if _options_ were the - same as when the program binary was originally built. - If the program was created using {clCreateProgramWithBinary} and _options_ - string contains anything other than the same options in the same order - (whitespace ignored) as when the program binary was originally built, then - the behavior is implementation-defined. - Otherwise, if _options_ is a `NULL` pointer then it will have the same - result as the empty string. - * _num_input_programs_ specifies the number of programs in array referenced by - _input_programs_. - * _input_programs_ is an array of program objects that are compiled binaries - or libraries that are to be linked to create the program executable. - For each device in _device_list_ or if _device_list_ is `NULL` the list of - devices associated with context, the following cases occur: - ** All programs specified by _input_programs_ contain a compiled binary or - library for the device. - In this case, a link is performed to generate a program executable for - this device. - ** None of the programs contain a compiled binary or library for that - device. - In this case, no link is performed and there will be no program - executable generated for this device. - ** All other cases will return a {CL_INVALID_OPERATION} error. - * _pfn_notify_ is a function pointer to a notification routine. - The notification routine is a callback function that an application can - register and which will be called when the program executable has been built - (successfully or unsuccessfully). - * _user_data_ will be passed as an argument when _pfn_notify_ is called. - _user_data_ can be `NULL`. + * _normalized_coords_ has the same interpretation as + {CL_SAMPLER_NORMALIZED_COORDS} in the <>. + * _addressing_mode_ has the same interpretation as + {CL_SAMPLER_ADDRESSING_MODE} in the <>. + * _filter_mode_ has the same interpretation as + {CL_SAMPLER_FILTER_MODE} in the <>. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -If _pfn_notify_ is not `NULL`, {clLinkProgram} does not need to wait for the -linker to complete, and can return immediately once the linking operation can -begin. -Once the linker has completed, the _pfn_notify_ callback function is called -which returns the program object returned by {clLinkProgram}. -Any state changes of the program object that result from calling {clLinkProgram} -(e.g. link status or log) will be observable from this callback function. -This callback function may be called asynchronously by the OpenCL -implementation. -It is the application's responsibility to ensure that the callback function -is thread-safe. +// refError -If _pfn_notify_ is `NULL`, {clLinkProgram} does not return until the linker -has completed. +{clCreateSampler} returns a valid non-zero sampler object and _errcode_ret_ is +set to {CL_SUCCESS} if the sampler object is created successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: -{clLinkProgram} creates a new program object which contains the library or -executable. -The library or executable binary can be queried using -{clGetProgramInfo}(_program_, {CL_PROGRAM_BINARIES}, ...) and can be specified -to {clCreateProgramWithBinary} to create a new program object. + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if _addressing_mode_, _filter_mode_, _normalized_coords_ + or a combination of these arguements are not valid. + * {CL_INVALID_OPERATION} if images are not supported by any device + associated with _context_ (i.e. {CL_DEVICE_IMAGE_SUPPORT} specified in the + <> table is {CL_FALSE}). + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- -The devices associated with the returned program object will be the list of -devices specified by _device_list_ or if _device_list_ is `NULL` it will be -the list of devices associated with _context_. +[open,refpage='clRetainSampler',desc='Increments the sampler reference count.',type='protos'] +-- +To retain a sampler object, call the function -The linking operation can begin if the context, list of devices, input -programs and linker options specified are all valid and appropriate host and -device resources needed to perform the link are available. -If the linking operation can begin, {clLinkProgram} returns a valid non-zero -program object. +include::{generated}/api/protos/clRetainSampler.txt[] +include::{generated}/api/version-notes/clRetainSampler.asciidoc[] -// refError + * _sampler_ specifies the sampler to be released. -If _pfn_notify_ is `NULL`, the _errcode_ret_ will be set to {CL_SUCCESS} if -the link operation was successful and {CL_LINK_PROGRAM_FAILURE} if there is a -failure to link the compiled binaries and/or libraries. +The _sampler_ reference count is incremented. +{clCreateSamplerWithProperties} and {clCreateSampler} perform an implicit +retain. -If _pfn_notify_ is not `NULL`, {clLinkProgram} does not have to wait until -the linker to complete and can return {CL_SUCCESS} in _errcode_ret_ if the -linking operation can begin. -The _pfn_notify_ callback function will return a {CL_SUCCESS} or -{CL_LINK_PROGRAM_FAILURE} if the linking operation was successful or not. +// refError -Otherwise {clLinkProgram} returns a `NULL` program object with an -appropriate error in _errcode_ret_. -The application should query the linker status of this program object to -check if the link was successful or not. -The list of errors that can be returned are: +{clRetainSampler} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater - than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. - * {CL_INVALID_VALUE} if _num_input_programs_ is zero and _input_programs_ is - `NULL` or if _num_input_programs_ is zero and _input_programs_ is not - `NULL` or if _num_input_programs_ is not zero and _input_programs_ is - `NULL`. - * {CL_INVALID_PROGRAM} if programs specified in _input_programs_ are not - valid program objects. - * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not - `NULL`. - * {CL_INVALID_DEVICE} if any device in _device_list_ is not in - the list of devices associated with _context_. - * {CL_INVALID_LINKER_OPTIONS} if the linker options specified by _options_ - are invalid. - * {CL_INVALID_OPERATION} if the compilation or build of a program executable - for any of the devices listed in _device_list_ by a previous call to - {clCompileProgram} or {clBuildProgram} for _program_ has not completed. - * {CL_INVALID_OPERATION} if the rules for devices containing compiled - binaries or libraries as described in _input_programs_ argument above - are not followed. - * {CL_LINKER_NOT_AVAILABLE} if a linker is not available, i.e. - {CL_DEVICE_LINKER_AVAILABLE} specified in the - <> table is set to {CL_FALSE}. - * {CL_LINK_PROGRAM_FAILURE} if there is a failure to link the compiled - binaries and/or libraries. + * {CL_INVALID_SAMPLER} if _sampler_ is not a valid sampler object. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +[open,refpage='clReleaseSampler',desc='Decrements the sampler reference count.',type='protos'] +-- +To release a sampler object, call the function -[[compiler-options]] -=== Compiler Options - -The compiler options are categorized as pre-processor options, options for -math intrinsics, options that control optimization and miscellaneous -options. -This specification defines a standard set of options that must be supported -by the compiler when building program executables online or offline from -OpenCL C/{cpp} or, where relevant, from an IL. -These may be extended by a set of vendor- or platform-specific options. +include::{generated}/api/protos/clReleaseSampler.txt[] +include::{generated}/api/version-notes/clReleaseSampler.asciidoc[] + * _sampler_ specifies the sampler to be released. -[[preprocessor-options]] -==== Preprocessor options +The _sampler_ reference count is decremented. +The sampler object is deleted after the reference count becomes zero and +commands queued for execution on a command-queue(s) that use _sampler_ have +finished. -These options control the OpenCL C/{cpp} preprocessor which is run on each -program source before actual compilation. -These options are ignored for programs created with IL. +// refError -`-D name` :: - Predefine _name_ as a macro, with definition 1. +{clReleaseSampler} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: -`-D name=definition` :: - The contents of _definition_ are tokenized and processed as if they - appeared during translation phase three in a `#define` directive. - In particular, the definition will be truncated by embedded newline - characters. -+ --- -`-D` options are processed in the order they are given in the _options_ -argument to {clBuildProgram} or {clCompileProgram}. -Note that a space is required between the `-D` option and the symbol it -defines, otherwise behavior is implementation-defined. --- + * {CL_INVALID_SAMPLER} if _sampler_ is not a valid sampler object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -`-I dir` :: - Add the directory _dir_ to the list of directories to be searched for - header files. - _dir_ can optionally be enclosed in double quotes. -+ --- -This option is not portable due to its dependency on host file system and -host operating system. -It is supported for backwards compatibility with previous OpenCL versions. -Developers are encouraged to create and use explicit header objects by means -of {clCompileProgram} followed by {clLinkProgram}. +Using this function to release a reference that was not obtained by creating +the object or by calling {clRetainSampler} causes undefined behavior. -- -[[math-intrinsics-options]] -==== Math Intrinsics Options +=== Sampler Object Queries -These options control compiler behavior regarding floating-point arithmetic. -These options trade off between speed and correctness. +[open,refpage='clGetSamplerInfo',desc='Returns information about the sampler object.',type='protos'] +-- +To return information about a sampler object, call the function -`-cl-single-precision-constant` :: - This option forces implicit conversions of double-precision floating-point - literals to single precision. - This option is ignored for programs created with IL. +include::{generated}/api/protos/clGetSamplerInfo.txt[] +include::{generated}/api/version-notes/clGetSamplerInfo.asciidoc[] -`-cl-denorms-are-zero` :: - This option controls how single precision and double precision - denormalized numbers are handled. - If specified as a build option, the single precision denormalized - numbers may be flushed to zero; double precision denormalized numbers - may also be flushed to zero if the optional extension for double - precision is supported. - This is intended to be a performance hint and the OpenCL compiler can - choose not to flush denorms to zero if the device supports single - precision (or double precision) denormalized numbers. -+ --- -This option is ignored for single precision numbers if the device does not -support single precision denormalized numbers i.e. {CL_FP_DENORM} bit is not -set in {CL_DEVICE_SINGLE_FP_CONFIG}. + * _sampler_ specifies the sampler being queried. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetSamplerInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. -This option is ignored for double precision numbers if the device does not -support double precision or if it does support double precision but not -double precision denormalized numbers i.e. {CL_FP_DENORM} bit is not set in -{CL_DEVICE_DOUBLE_FP_CONFIG}. +[[sampler-info-table]] +.List of supported param_names by {clGetSamplerInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Sampler Info | Return Type | Description +| {CL_SAMPLER_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] -This flag only applies for scalar and vector single precision floating-point -variables and computations on these floating-point variables inside a -program. -It does not apply to reading from or writing to image objects. --- +include::{generated}/api/version-notes/CL_SAMPLER_REFERENCE_COUNT.asciidoc[] + | {cl_uint_TYPE} + | Return the _sampler_ reference count. +| {CL_SAMPLER_CONTEXT_anchor} -`-cl-fp32-correctly-rounded-divide-sqrt` :: - The `-cl-fp32-correctly-rounded-divide-sqrt` build option to - {clBuildProgram} or {clCompileProgram} allows an application to specify - that single precision floating-point divide (x/y and 1/x) and sqrt used - in the program source are correctly rounded. - If this build option is not specified, the minimum numerical accuracy of - single precision floating-point divide and sqrt are as defined in the - OpenCL C or OpenCL SPIR-V Environment specifications. -+ --- -This build option can only be specified if the -{CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT} is set in {CL_DEVICE_SINGLE_FP_CONFIG} (as -defined in the <> table) for devices -that the program is being build. -{clBuildProgram} or {clCompileProgram} will fail to compile the program for -a device if the `-cl-fp32-correctly-rounded-divide-sqrt` option is specified -and {CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT} is not set for the device. +include::{generated}/api/version-notes/CL_SAMPLER_CONTEXT.asciidoc[] + | {cl_context_TYPE} + | Return the context specified when the sampler is created. +// Note: This enum is used for two purposes: as a property and for a query. +// We use the property as the anchor. +| {CL_SAMPLER_NORMALIZED_COORDS} -Note: This option is <> version 1.2. --- +include::{generated}/api/version-notes/CL_SAMPLER_NORMALIZED_COORDS.asciidoc[] + | {cl_bool_TYPE} + | Return the normalized coords value associated with _sampler_. +// Note: This enum is used for two purposes: as a property and for a query. +// We use the property as the anchor. +| {CL_SAMPLER_ADDRESSING_MODE} +include::{generated}/api/version-notes/CL_SAMPLER_ADDRESSING_MODE.asciidoc[] + | {cl_addressing_mode_TYPE} + | Return the addressing mode value associated with _sampler_. +// Note: This enum is used for two purposes: as a property and for a query. +// We use the property as the anchor. +| {CL_SAMPLER_FILTER_MODE} -[[optimization-options]] -==== Optimization Options +include::{generated}/api/version-notes/CL_SAMPLER_FILTER_MODE.asciidoc[] + | {cl_filter_mode_TYPE} + | Return the filter mode value associated with _sampler_. -These options control various sorts of optimizations. -Turning on optimization flags makes the compiler attempt to improve the -performance and/or code size at the expense of compilation time and possibly -the ability to debug the program. +| {CL_SAMPLER_PROPERTIES_anchor} -`-cl-opt-disable` :: - This option disables all optimizations. - The default is optimizations are enabled. +include::{generated}/api/version-notes/CL_SAMPLER_PROPERTIES.asciidoc[] + | {cl_sampler_properties_TYPE}[] + | Return the properties argument specified in + {clCreateSamplerWithProperties}. -`-cl-strict-aliasing` :: - This option allows the compiler to assume the strictest aliasing rules. -+ --- -Note: This option is <> version 1.1. --- + If the _properties_ argument specified in {clCreateSamplerWithProperties} + used to create _sampler_ was not `NULL`, the implementation must return + the values specified in the properties argument in the same order and + without including additional properties. -`-cl-uniform-work-group-size` :: - This requires that the global work-size be a multiple of the work-group - size specified to {clEnqueueNDRangeKernel}. - Allow optimizations that are made possible by this restriction. -+ --- -Note: This option is <> version 2.0. --- + If _sampler_ was created using {clCreateSampler}, or if the _properties_ + argument specified in {clCreateSamplerWithProperties} was `NULL`, the + implementation must return _param_value_size_ret_ equal to 0, + indicating that there are no properties to be returned. +|==== -`-cl-no-subgroup-ifp` :: - This indicates that kernels in this program do not require sub-groups to - make independent forward progress. - Allows optimizations that are made possible by this restriction. - This option has no effect for devices that do not support independent - forward progress for sub-groups. -+ --- -Note: This option is <> version 2.1. +// refError + +{clGetSamplerInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table and + _param_value_ is not `NULL`. + * {CL_INVALID_SAMPLER} if _sampler_ is a not a valid sampler object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- -The following options control compiler behavior regarding floating-point -arithmetic. -These options trade off between performance and correctness and must be -specifically enabled. -These options are not turned on by default since it can result in incorrect -output for programs which depend on an exact implementation of IEEE 754 -rules/specifications for math functions. -`-cl-mad-enable` :: - Allow `a * b + c` to be replaced by a *mad* instruction. - The *mad* instruction may compute `a * b + c` with reduced accuracy - in the embedded profile. - See the OpenCL C or OpenCL SPIR-V Environment specification for accuracy - details. - On some hardware the *mad* instruction may provide better performance - than the expanded computation. +== Program Objects -`-cl-no-signed-zeros` :: - Allow optimizations for floating-point arithmetic that ignore the - signedness of zero. - IEEE 754 arithmetic specifies the distinct behavior of `+0.0` and `-0.0` - values, which then prohibits simplification of expressions such as `x - {plus} 0.0` or `0.0 * x` (even with `-cl-finite-math-only`). - This option implies that the sign of a zero result isn't significant. +An OpenCL program consists of a set of kernels that are identified as +functions declared with the `+__kernel+` qualifier in the program source. +OpenCL programs may also contain auxiliary functions and constant data that +can be used by kernel functions. +The program executable can be generated _online_ or _offline_ by the OpenCL +compiler for the appropriate target device(s). -`-cl-unsafe-math-optimizations` :: - Allow optimizations for floating-point arithmetic that (a) assume that - arguments and results are valid, (b) may violate the IEEE 754 standard, - (c) assume relaxed OpenCL numerical compliance requirements as defined - in the unsafe math optimization section of the OpenCL C or OpenCL SPIR-V - Environment specifications, and (d) may violate edge case behavior in the - OpenCL C or OpenCL SPIR-V Environment specifications. - This option includes the `-cl-no-signed-zeros`, `-cl-mad-enable`, and - `-cl-denorms-are-zero` footnote:[{fn-unsafe-denorms-are-zero}] options. +A program object encapsulates the following information: -`-cl-finite-math-only` :: - Allow optimizations for floating-point arithmetic that assume that - arguments and results are not NaNs, +Inf, -Inf. - This option may violate the OpenCL numerical compliance requirements for - single precision and double precision floating-point, as well as edge - case behavior. + * An associated context. + * A program source or binary. + * The latest successfully built program executable, library or compiled + binary, the list of devices for which the program executable, library or + compiled binary is built, the build options used and a build log. + * The number of kernel objects currently attached. -`-cl-fast-relaxed-math` :: - Sets the optimization options `-cl-finite-math-only` and - `-cl-unsafe-math-optimizations`. - This option causes the preprocessor macro `+__FAST_RELAXED_MATH__+` to - be defined in the OpenCL program. +=== Creating Program Objects -==== Options to Request or Suppress Warnings +[open,refpage='clCreateProgramWithSource',desc='Creates a program object for a context, and loads source code specified by text strings into the program object.',type='protos'] +-- +To creates a program object for a context and load source code into that +object, call the function -Warnings are diagnostic messages that report constructions which are not -inherently erroneous but which are risky or suggest there may have been an -error. -The following language-independent options do not enable specific warnings -but control the kinds of diagnostics produced by the OpenCL compiler. -These options are ignored for programs created with IL. +include::{generated}/api/protos/clCreateProgramWithSource.txt[] +include::{generated}/api/version-notes/clCreateProgramWithSource.asciidoc[] -`-w` :: - Inhibit all warning messages. + * _context_ must be a valid OpenCL context. + * _strings_ is an array of _count_ pointers to optionally null-terminated + character strings that make up the source code. + * _lengths_ argument is an array with the number of chars in each string + (the string length). + If an element in _lengths_ is zero, its accompanying string is + null-terminated. + If _lengths_ is `NULL`, all strings in the _strings_ argument are considered + null-terminated. + Any length value passed in that is greater than zero excludes the null + terminator in its count. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -`-Werror` :: - Make all warnings into errors. +The source code specified by _strings_ will be loaded into the program +object. +The devices associated with the program object are the devices associated +with _context_. +The source code specified by _strings_ is either an OpenCL C program source, +header or implementation-defined source for custom devices that support an +online compiler. +OpenCL {cpp} is not supported as an online-compiled kernel language through +this interface. -[[opencl-c-version]] -==== Options Controlling the OpenCL C version +// refError -The following option controls the version of OpenCL C that the compiler -accepts. -These options are ignored for programs created with IL. +{clCreateProgramWithSource} returns a valid non-zero program object and +_errcode_ret_ is set to {CL_SUCCESS} if the program object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: -`-cl-std=` :: - Determine the OpenCL C language version to use. - A value for this option must be provided. - Valid values are: -+ --- - * `CL1.1`: Support OpenCL C 1.1 language features defined in _section 6_ of - the OpenCL 1.1 specification or in the unified OpenCL C specification. - * `CL1.2`: Support OpenCL C 1.2 language features defined in _section 6_ of - the OpenCL 1.2 specification or in the unified OpenCL C specification. - * `CL2.0`: Support OpenCL C 2.0 language features defined in the OpenCL C 2.0 - specification or in the unified OpenCL C specification. - * `CL3.0`: Support OpenCL C 3.0 language features defined in the unified - OpenCL C specification. + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if _count_ is zero or if _strings_ or any entry in + _strings_ is `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- -Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL1.1` -option *will fail* to compile the program for any devices with -{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.0 and when -{CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 1.1. +[open,refpage='clCreateProgramWithIL',desc='Creates a program object for a context, and loads the IL into the program object.',type='protos',alias='clCreateProgramWithILKHR'] +-- +To create a program object for a context and load code in an intermediate +language into that object, call the function -Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL1.2` -option *will fail* to compile the program for any devices with -{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.1 or earlier -and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 1.2. +include::{generated}/api/protos/clCreateProgramWithIL.txt[] +include::{generated}/api/version-notes/clCreateProgramWithIL.asciidoc[] -Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL2.0` -option *will fail* to compile the program for any devices with -{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.2 or earlier -and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 2.0. +ifdef::cl_khr_il_program[] +or the equivalent -Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL3.0` -option *will fail* to compile the program for any devices with -{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 2.0 or earlier -and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 3.0. +include::{generated}/api/protos/clCreateProgramWithILKHR.txt[] +include::{generated}/api/version-notes/clCreateProgramWithILKHR.asciidoc[] +endif::cl_khr_il_program[] -If the `-cl-std` build option is not specified, the highest OpenCL C 1.x -language version supported by each device is used when compiling the program -for each device. -Applications are required to specify the `-cl-std=CL2.0` build option to -compile or build programs with OpenCL C 2.0 and the `-cl-std=CL3.0` -build option to compile or build programs with OpenCL C 3.0. + * _context_ must be a valid OpenCL context. + * _il_ is a pointer to a block of memory containing SPIR-V or an + implementation-defined intermediate language. + * _length_ is the length of the block pointed to by _il_. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. +The intermediate language pointed to by _il_ and with length in bytes _length_ +will be loaded into the program object. +The devices associated with the program object are the devices associated +with _context_. -==== Options for Querying Kernel Argument Information +// refError -IMPORTANT: Querying for kernel argument information is <> version 1.2. +{clCreateProgramWithIL} returns a valid non-zero program object and +_errcode_ret_ is set to {CL_SUCCESS} if the program object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: -`-cl-kernel-arg-info` :: - This option allows the compiler to store information about the arguments - of a kernel(s) in the program executable. - The argument information stored includes the argument name, its type, - the address space and access qualifiers used. - Refer to description of {clGetKernelArgInfo} on how to query this - information. + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_OPERATION} if no devices in _context_ support intermediate + language programs. + * {CL_INVALID_VALUE} if _il_ is `NULL` or if _length_ is zero. + * {CL_INVALID_VALUE} if the _length_-byte block of memory pointed to by + _il_ does not contain well-formed intermediate language input that can + be consumed by the OpenCL runtime. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- +[open,refpage='clCreateProgramWithBinary',desc='Creates a program object for a context, and loads binary bits into the program object.',type='protos'] +-- +To create a program object for a context and load binary bits into that +object, call the function -==== Options for debugging your program +include::{generated}/api/protos/clCreateProgramWithBinary.txt[] +include::{generated}/api/version-notes/clCreateProgramWithBinary.asciidoc[] -IMPORTANT: Debugging options are <> version 2.0. + * _context_ must be a valid OpenCL context. + * _device_list_ is a pointer to a list of devices that are in _context_. + _device_list_ must be a non-`NULL` value. + The binaries are loaded for devices specified in this list. + * _num_devices_ is the number of devices listed in _device_list_. + * _lengths_ is an array of the size in bytes of the program binaries to be + loaded for devices specified by _device_list_. + * _binaries_ is an array of pointers to program binaries to be loaded for + devices specified by _device_list_. + For each device given by _device_list_[i], the pointer to the program binary + for that device is given by _binaries_[i] and the length of this + corresponding binary is given by _lengths_[i]. + _lengths_[i] cannot be zero and _binaries_[i] cannot be a `NULL` pointer. + * _binary_status_ returns whether the program binary for each device specified + in _device_list_ was loaded successfully or not. + It is an array of _num_devices_ entries and returns {CL_SUCCESS} in + _binary_status_[i] if binary was successfully loaded for device specified by + _device_list_[i]; otherwise returns {CL_INVALID_VALUE} if _lengths_[i] is zero + or if _binaries_[i] is a `NULL` value or {CL_INVALID_BINARY} in + _binary_status_[i] if program binary is not a valid binary for the specified + device. + If _binary_status_ is `NULL`, it is ignored. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -`-g` :: - This option can currently be used to generate additional errors for the - built-in functions that allow you to enqueue commands on a device (refer - to OpenCL kernel languages specifications). +The devices associated with the program object will be the list of devices +specified by _device_list_. +The list of devices specified by _device_list_ must be devices associated +with _context_. +The program binaries specified by _binaries_ will be loaded into the program +object. +They contain bits that describe one of the following: -[[linker-options]] -=== Linker Options + * a program executable to be run on the device(s) associated with + _context_, + * a compiled program for device(s) associated with _context_, or + * a library of compiled programs for device(s) associated with _context_. -NOTE: Linker options are <> version 1.2. +The program binary can consist of either or both: -This specification defines a standard set of linker options that must be -supported by the OpenCL C compiler when linking compiled programs online or -offline. -These linker options are categorized as library linking options and program -linking options. -These may be extended by a set of vendor- or platform-specific options. + * Device-specific code and/or, + * Implementation-specific intermediate representation (IR) which will be + converted to the device-specific code. +OpenCL allows applications to create a program object using the program +source or binary and build appropriate program executables. +This can be very useful as it allows applications to load program source and +then compile and link to generate a program executable online on its first +instance for appropriate OpenCL devices in the system. +These executables can now be queried and cached by the application. +The cached executables can be read and loaded by the application, which can +help significantly reduce the application initialization time. -==== Library Linking Options +ifdef::cl_khr_spir[] +If the `<>` extension is supported, {clCreateProgramWithBinary} +can be used to load a SPIR binary. +Once a program object has been created from a SPIR binary, {clBuildProgram} +can be called to build a program executable or {clCompileProgram} can be +called to compile the SPIR binary. +endif::cl_khr_spir[] -IMPORTANT: Library linking options are <> version -1.2. +// refError -The following options can be specified when creating a library of compiled -binaries. +{clCreateProgramWithBinary} returns a valid non-zero program object and +_errcode_ret_ is set to {CL_SUCCESS} if the program object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: -`-create-library` :: - Create a library of compiled binaries specified in _input_programs_ - argument to {clLinkProgram}. + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if _device_list_ is `NULL` or _num_devices_ is zero. + * {CL_INVALID_DEVICE} if any device in _device_list_ is not in + the list of devices associated with _context_. + * {CL_INVALID_VALUE} if _lengths_ or _binaries_ is `NULL` or if any entry + in _lengths_[i] is zero or _binaries_[i] is `NULL`. + * {CL_INVALID_BINARY} if an invalid program binary was encountered for any + device. + _binary_status_ will return specific status for each device. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- -`-enable-link-options` :: - Allows the linker to modify the library behavior based on one or more - link options (described in <>) when this library is linked with a program executable. - This option must be specified with the create-library option. +[open,refpage='clCreateProgramWithBuiltInKernels',desc='Creates a program object for a context, and loads the information related to the built-in kernels into a program object.',type='protos'] +-- +To create a program object for a context and loads the information related +to the built-in kernels into that object, call the function +include::{generated}/api/protos/clCreateProgramWithBuiltInKernels.txt[] +include::{generated}/api/version-notes/clCreateProgramWithBuiltInKernels.asciidoc[] -[[program-linking-options]] -==== Program Linking Options + * _context_ must be a valid OpenCL context. + * _num_devices_ is the number of devices listed in _device_list_. + * _device_list_ is a pointer to a list of devices that are in _context_. + _device_list_ must be a non-`NULL` value. + The built-in kernels are loaded for devices specified in this list. + * _kernel_names_ is a semi-colon separated list of built-in kernel names. -The following options can be specified when linking a program executable. +The devices associated with the program object will be the list of devices +specified by _device_list_. +The list of devices specified by _device_list_ must be devices associated +with _context_. -`-cl-denorms-are-zero` + -`-cl-no-signed-zeros` + -`-cl-unsafe-math-optimizations` + -`-cl-finite-math-only` + -`-cl-fast-relaxed-math` + -`-cl-no-subgroup-ifp` (<> version 2.1) +// refError -The options are described in <> and <>. -The linker may apply these options to all compiled program objects -specified to {clLinkProgram}. -The linker may apply these options only to libraries which were created -with the option `-enable-link-options`. +{clCreateProgramWithBuiltInKernels} returns a valid non-zero program object +and _errcode_ret_ is set to {CL_SUCCESS} if the program object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if _device_list_ is `NULL` or _num_devices_ is zero. + * {CL_INVALID_VALUE} if _kernel_names_ is `NULL` or _kernel_names_ contains + a kernel name that is not supported by any of the devices in + _device_list_. + * {CL_INVALID_DEVICE} if any device in _device_list_ is not in the list + of devices associated with _context_. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- -=== Unloading the OpenCL Compiler -[open,refpage='clUnloadPlatformCompiler',desc='Allows the implementation to release the resources allocated by the OpenCL compiler for a platform.',type='protos'] +=== Retaining and Releasing Program Objects + +[open,refpage='clRetainProgram',desc='Increments the program reference count.',type='protos'] -- -To unload an OpenCL compiler for a platform, call the function +To retain a program object, call the function -include::{generated}/api/protos/clUnloadPlatformCompiler.txt[] -include::{generated}/api/version-notes/clUnloadPlatformCompiler.asciidoc[] +include::{generated}/api/protos/clRetainProgram.txt[] +include::{generated}/api/version-notes/clRetainProgram.asciidoc[] - * _platform_ is the platform to unload. + * _program_ is the program object to be retained. -This function allows the implementation to release the resources allocated -by the OpenCL compiler for _platform_. -This is a hint from the application and does not guarantee that the compiler -will not be used in the future or that the compiler will actually be -unloaded by the implementation. -Calls to {clBuildProgram}, {clCompileProgram} or {clLinkProgram} after -{clUnloadPlatformCompiler} will reload the compiler, if necessary, to build -the appropriate program executable. +The _program_ reference count is incremented. +All APIs that create a program do an implicit retain. // refError -{clUnloadPlatformCompiler} returns {CL_SUCCESS} if the function is executed +{clRetainProgram} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- -[open,refpage='clUnloadCompiler',desc='Allows the implementation to release the resources allocated by the OpenCL compiler.',type='protos'] +[open,refpage='clReleaseProgram',desc='Decrements the program reference count.',type='protos'] -- -Alternatively, if you are not using OpenCL via the ICD loader, you may unload the OpenCL compiler with the function +To release a program object, call the function -include::{generated}/api/protos/clUnloadCompiler.txt[] -include::{generated}/api/version-notes/clUnloadCompiler.asciidoc[] +include::{generated}/api/protos/clReleaseProgram.txt[] +include::{generated}/api/version-notes/clReleaseProgram.asciidoc[] -This function allows the implementation to release the resources allocated -by the OpenCL compiler. -This is a hint from the application and does not guarantee that the compiler -will not be used in the future or that the compiler will actually be -unloaded by the implementation. -Calls to {clBuildProgram}, {clCompileProgram} or {clLinkProgram} after -{clUnloadCompiler} will reload the compiler, if necessary, to build -the appropriate program executable. + * _program_ is the program object to be released. -// refError +The _program_ reference count is decremented. +The program object is deleted after all kernel objects associated with +_program_ have been deleted and the _program_ reference count becomes zero. -{clUnloadCompiler} will always return {CL_SUCCESS}. --- +// refError +{clReleaseProgram} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: -=== Program Object Queries + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -[open,refpage='clGetProgramInfo',desc='Returns information about the program object.',type='protos'] +Using this function to release a reference that was not obtained by creating +the object or by calling {clRetainProgram} causes undefined behavior. -- -To return information about a program object, call the function - -include::{generated}/api/protos/clGetProgramInfo.txt[] -include::{generated}/api/version-notes/clGetProgramInfo.asciidoc[] - * _program_ specifies the program object being queried. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetProgramInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. +[open,refpage='clSetProgramReleaseCallback',desc='Registers a destructor callback function with a program object.',type='protos'] +-- +To register a callback function with a program object that is called when +the program object is destroyed, call the function -[[program-info-table]] -.List of supported param_names by {clGetProgramInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Program Info | Return Type | Description -| {CL_PROGRAM_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] +include::{generated}/api/protos/clSetProgramReleaseCallback.txt[] +include::{generated}/api/version-notes/clSetProgramReleaseCallback.asciidoc[] -include::{generated}/api/version-notes/CL_PROGRAM_REFERENCE_COUNT.asciidoc[] - | {cl_uint_TYPE} - | Return the _program_ reference count. -| {CL_PROGRAM_CONTEXT_anchor} + * _program_ specifies the memory object to register the callback to. + * _pfn_notify_ is the callback function to register. + This callback function may be called asynchronously by the OpenCL + implementation. + It is the application's responsibility to ensure that the callback function + is thread-safe. + The parameters to this callback function are: + ** _program_ is the program being deleted. + When the callback function is called by the implementation, this program + object is not longer valid. + _program_ is only provided for reference purposes. + ** _user_data_ is a pointer to user supplied data. + * _user_data_ will be passed as the _user_data_ argument when _pfn_notify_ is + called. + _user_data_ can be `NULL`. -include::{generated}/api/version-notes/CL_PROGRAM_CONTEXT.asciidoc[] - | {cl_context_TYPE} - | Return the context specified when the program object is created -| {CL_PROGRAM_NUM_DEVICES_anchor} +Each call to {clSetProgramReleaseCallback} registers the specified +callback function on a callback stack associated with _program_. +The registered callback functions are called in the reverse order in +which they were registered. +The registered callback functions are called after destructors (if any) for +program scope global variables (if any) are called and before the program +object is deleted. +This provides a mechanism for an application to be notified when destructors +for program scope global variables are complete. -include::{generated}/api/version-notes/CL_PROGRAM_NUM_DEVICES.asciidoc[] - | {cl_uint_TYPE} - | Return the number of devices associated with _program_. -| {CL_PROGRAM_DEVICES_anchor} +// refError -include::{generated}/api/version-notes/CL_PROGRAM_DEVICES.asciidoc[] - | {cl_device_id_TYPE}[] - | Return the list of devices associated with the program object. - This can be the devices associated with context on which the program - object has been created or can be a subset of devices that are - specified when a program object is created using - {clCreateProgramWithBinary}. -| {CL_PROGRAM_SOURCE_anchor} +{clSetProgramReleaseCallback} may unconditionally return an error if no +devices in the context associated with _program_ support destructors for +program scope global variables. +Support for constructors and destructors for program scope global variables +is required only for OpenCL 2.2 devices. -include::{generated}/api/version-notes/CL_PROGRAM_SOURCE.asciidoc[] - | {char_TYPE}[] - | Return the program source code specified by - {clCreateProgramWithSource}. - The source string returned is a concatenation of all source strings - specified to {clCreateProgramWithSource} with a null terminator. - The concatenation strips any nulls in the original source strings. +{clSetProgramReleaseCallback} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: - If _program_ is created using {clCreateProgramWithBinary}, - {clCreateProgramWithIL} or {clCreateProgramWithBuiltInKernels}, a - null string or the appropriate program source code is returned - depending on whether or not the program source code is stored in the - binary. + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_OPERATION} if no devices in the context associated with + _program_ support destructors for program scope global variables. + * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- - The actual number of characters that represents the program source - code including the null terminator is returned in - _param_value_size_ret_. -| {CL_PROGRAM_IL_anchor} -include::{generated}/api/version-notes/CL_PROGRAM_IL.asciidoc[] -Also see extension *cl_khr_il_program*. - | {char_TYPE}[] - | Returns the program IL for programs created with - {clCreateProgramWithIL}. +=== Setting SPIR-V Specialization Constants - If _program_ is created with {clCreateProgramWithSource}, - {clCreateProgramWithBinary} or {clCreateProgramWithBuiltInKernels} - the memory pointed to by param_value will be unchanged and - param_value_size_retwill be set to 0. -| {CL_PROGRAM_BINARY_SIZES_anchor} +NOTE: Specialization constants are <> version +2.2. -include::{generated}/api/version-notes/CL_PROGRAM_BINARY_SIZES.asciidoc[] - | {size_t_TYPE}[] - | Returns an array that contains the size in bytes of the program - binary (could be an executable binary, compiled binary or library - binary) for each device associated with program. - The size of the array is the number of devices associated with - program. - If a binary is not available for a device(s), a size of zero is - returned. +[open,refpage='clSetProgramSpecializationConstant',desc='',type='protos'] +-- +To set the value of a specialization constant, call the function - If _program_ is created using {clCreateProgramWithBuiltInKernels}, - the implementation may return zero in any entries of the returned - array. -| {CL_PROGRAM_BINARIES_anchor} +include::{generated}/api/protos/clSetProgramSpecializationConstant.txt[] +include::{generated}/api/version-notes/clSetProgramSpecializationConstant.asciidoc[] -include::{generated}/api/version-notes/CL_PROGRAM_BINARIES.asciidoc[] - | {unsigned_char_TYPE}*[] - | Return the program binaries (could be an executable binary, compiled - binary or library binary) for all devices associated with program. - For each device in program, the binary returned can be the binary - specified for the device when program is created with - {clCreateProgramWithBinary} or it can be the executable binary - generated by {clBuildProgram} or {clLinkProgram}. - If _program_ is created with {clCreateProgramWithSource} or - {clCreateProgramWithIL}, the binary returned is the binary generated - by {clBuildProgram}, {clCompileProgram} or {clLinkProgram}. - The bits returned can be an implementation-specific intermediate - representation (a.k.a. IR) or device specific executable bits or - both. - The decision on which information is returned in the binary is up to - the OpenCL implementation. + * _program_ must be a valid OpenCL program created from an intermediate + language (e.g. SPIR-V). + * _spec_id_ identifies the specialization constant whose value will be + set. + * _spec_size_ specifies the size in bytes of the data pointed to by + _spec_value_. + This should be 1 for boolean constants. + For all other constant types this should match the size of the + specialization constant in the module. + * _spec_value_ is a pointer to the memory location that contains the value of + the specialization constant. + The data pointed to by _spec_value_ are copied and can be safely reused by + the application after {clSetProgramSpecializationConstant} returns. + This specialization value will be used by subsequent calls to + {clBuildProgram} until another call to {clSetProgramSpecializationConstant} + changes it. + If a specialization constant is a boolean constant, _spec_value_ should be a + pointer to a {cl_uchar_TYPE} value. + A value of zero will set the specialization constant to false; any other + value will set it to true. - param_value points to an array of `n` pointers allocated by the - caller, where `n` is the number of devices associated with program. - The buffer sizes needed to allocate the memory that these `n` - pointers refer to can be queried using the {CL_PROGRAM_BINARY_SIZES} - query as described in this table. +Calling this function multiple times for the same specialization constant +shall cause the last provided value to override any previously specified +value. +The values are used by a subsequent {clBuildProgram} call for the _program_. - Each entry in this array is used by the implementation as the - location in memory where to copy the program binary for a specific - device, if there is a binary available. - To find out which device the program binary in the array refers to, - use the {CL_PROGRAM_DEVICES} query to get the list of devices. - There is a one-to-one correspondence between the array of n pointers - returned by {CL_PROGRAM_BINARIES} and array of devices returned by - {CL_PROGRAM_DEVICES}. -| {CL_PROGRAM_NUM_KERNELS_anchor} +Application is not required to provide values for every specialization +constant contained in the module. If the value is not set by this API +call, default values will be used during the build. -include::{generated}/api/version-notes/CL_PROGRAM_NUM_KERNELS.asciidoc[] - | {size_t_TYPE} - | Returns the number of kernels declared in _program_ that can be - created with {clCreateKernel}. - This information is only available after a successful program - executable has been built for at least one device in the list of - devices associated with _program_. -| {CL_PROGRAM_KERNEL_NAMES_anchor} +// refError -include::{generated}/api/version-notes/CL_PROGRAM_KERNEL_NAMES.asciidoc[] - | {char_TYPE}[] - | Returns a semi-colon separated list of kernel names in _program_ - that can be created with {clCreateKernel}. - This information is only available after a successful program - executable has been built for at least one device in the list of - devices associated with _program_. -| {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT_anchor} +{clSetProgramSpecializationConstant} returns {CL_SUCCESS} if the function is +executed successfully. -include::{generated}/api/version-notes/CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT.asciidoc[] - | {cl_bool_TYPE} - | This indicates that the _program_ object contains non-trivial - constructor(s) that will be executed by runtime before any kernel - from the program is executed. - This information is only available after a successful program - executable has been built for at least one device in the list of - devices associated with _program_. +Otherwise, it returns one of the following errors: - Querying {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT} may unconditionally - return {CL_FALSE} if no devices associated with _program_ support - constructors for program scope global variables. - Support for constructors and destructors for program scope global - variables is required only for OpenCL 2.2 devices. -| {CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT_anchor} - -include::{generated}/api/version-notes/CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT.asciidoc[] - | {cl_bool_TYPE} - | This indicates that the program object contains non-trivial - destructor(s) that will be executed by runtime when _program_ is - destroyed. - This information is only available after a successful program - executable has been built for at least one device in the list of - devices associated with _program_. - - Querying {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT} may unconditionally - return {CL_FALSE} if no devices associated with _program_ support - destructors for program scope global variables. - Support for constructors and destructors for program scope global - variables is required only for OpenCL 2.2 devices. -|==== - -// refError - -{clGetProgramInfo} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_PROGRAM} if _program_ is a not a valid program object. - * {CL_INVALID_PROGRAM_EXECUTABLE} if _param_name_ is - {CL_PROGRAM_NUM_KERNELS}, {CL_PROGRAM_KERNEL_NAMES}, - {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT}, or - {CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT} and a successful program executable - has not been built for at least one device in the list of devices - associated with _program_. + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object created + from an intermediate language (e.g. SPIR-V), or if the intermediate + language does not support specialization constants. + * {CL_INVALID_OPERATION} if no devices associated with _program_ support + intermediate language programs. + * {CL_COMPILER_NOT_AVAILABLE} if _program_ is created with + {clCreateProgramWithIL} and a compiler is not + available, i.e. {CL_DEVICE_COMPILER_AVAILABLE} specified in the + <> table is set to {CL_FALSE}. + * {CL_INVALID_SPEC_ID} if _spec_id_ is not a valid specialization constant + identifier. + * {CL_INVALID_VALUE} if _spec_size_ does not match the size of the + specialization constant in the module, or if _spec_value_ is + `NULL`. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clGetProgramBuildInfo',desc='Returns build information for each device in the program object.',type='protos'] --- -To return build information for each device in the program object, call the -function - -include::{generated}/api/protos/clGetProgramBuildInfo.txt[] -include::{generated}/api/version-notes/clGetProgramBuildInfo.asciidoc[] - - * _program_ specifies the program object being queried. - * _device_ specifies the device for which build information is being queried. - _device_ must be a valid device associated with _program_. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetProgramBuildInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. - -[[program-build-info-table]] -.List of supported param_names by {clGetProgramBuildInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Program Build Info | Return Type | Description -| {CL_PROGRAM_BUILD_STATUS_anchor} - -include::{generated}/api/version-notes/CL_PROGRAM_BUILD_STATUS.asciidoc[] - | {cl_build_status_TYPE} - | Returns the build, compile or link status, whichever was performed - last on the specified _program_ object for _device_. - - This can be one of the following: - - {CL_BUILD_NONE_anchor} - The build status returned if no {clBuildProgram}, - {clCompileProgram} or {clLinkProgram} has been performed on the - specified _program_ object for _device_). - - {CL_BUILD_ERROR_anchor} - The build status returned if {clBuildProgram}, - {clCompileProgram} or {clLinkProgram} - whichever was performed last - on the specified _program_ object for _device_ - generated an error. - - {CL_BUILD_SUCCESS_anchor} - The build status returned if {clBuildProgram}, - {clCompileProgram} or {clLinkProgram} - whichever was performed last - on the specified _program_ object for _device_ - was successful. - - {CL_BUILD_IN_PROGRESS_anchor} - The build status returned if - {clBuildProgram}, {clCompileProgram} or {clLinkProgram} - whichever - was performed last on the specified _program_ object for _device_ - has - not finished. -| {CL_PROGRAM_BUILD_OPTIONS_anchor} - -include::{generated}/api/version-notes/CL_PROGRAM_BUILD_OPTIONS.asciidoc[] - | {char_TYPE}[] - | Return the build, compile or link options specified by the options - argument in {clBuildProgram}, {clCompileProgram} or {clLinkProgram}, - whichever was performed last on the specified _program_ object for - _device_. - If build status of the specified _program_ for _device_ is - {CL_BUILD_NONE}, an empty string is returned. -| {CL_PROGRAM_BUILD_LOG_anchor} - -include::{generated}/api/version-notes/CL_PROGRAM_BUILD_LOG.asciidoc[] - | {char_TYPE}[] - | Return the build, compile or link log for {clBuildProgram}, - {clCompileProgram} or {clLinkProgram}, whichever was performed last - on program for device. - - If build status of the specified _program_ for _device_ is - {CL_BUILD_NONE}, an empty string is returned. -| {CL_PROGRAM_BINARY_TYPE_anchor} - -include::{generated}/api/version-notes/CL_PROGRAM_BINARY_TYPE.asciidoc[] - | {cl_program_binary_type_TYPE} - | Return the program binary type for device. - This can be one of the following values: +=== Building Program Executables - {CL_PROGRAM_BINARY_TYPE_NONE_anchor} - There is no binary associated - with the specified _program_ object for _device_. +[open,refpage='clBuildProgram',desc='Builds (compiles and links) a program executable from the program source or binary.',type='protos'] +-- +To build (compile & link) a program executable, call the function - {CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT_anchor} - A compiled binary is - associated with _device_. - This is the case when the specified _program_ object was created using - {clCreateProgramWithSource} and compiled using {clCompileProgram}, or - when a compiled binary was loaded using {clCreateProgramWithBinary}. +include::{generated}/api/protos/clBuildProgram.txt[] +include::{generated}/api/version-notes/clBuildProgram.asciidoc[] - {CL_PROGRAM_BINARY_TYPE_LIBRARY_anchor} - A library binary is - associated with _device_. - This is the case when the specified _program_ object was linked by - {clLinkProgram} using the `-create-library` link option, or when a - compiled library binary was loaded using {clCreateProgramWithBinary}. + * _program_ is the program object. + * _device_list_ is a pointer to a list of devices associated with _program_. + If _device_list_ is a `NULL` value, the program executable is built for all + devices associated with _program_ for which a source or binary has been + loaded. + If _device_list_ is a non-`NULL` value, the program executable is built for + devices specified in this list for which a source or binary has been loaded. + * _num_devices_ is the number of devices listed in _device_list_. + * _options_ is a pointer to a null-terminated string of characters that + describes the build options to be used for building the program executable. + The list of supported options is described in <>. + If the program was created using {clCreateProgramWithBinary} and _options_ + is a `NULL` pointer, the program will be built as if _options_ were the same + as when the program binary was originally built. + If the program was created using {clCreateProgramWithBinary} and _options_ + string contains anything other than the same options in the same order + (whitespace ignored) as when the program binary was originally built, then + the behavior is implementation-defined. + Otherwise, if _options_ is a `NULL` pointer then it will have the same + result as the empty string. + * _pfn_notify_ is a function pointer to a notification routine. + The notification routine is a callback function that an application can + register and which will be called when the program executable has been built + (successfully or unsuccessfully). + If _pfn_notify_ is not `NULL`, {clBuildProgram} does not need to wait for + the build to complete and can return immediately once the build operation + can begin. + Any state changes of the program object that result from calling + {clBuildProgram} (e.g. build status or log) will be observable from this + callback function. + The build operation can begin if the context, program whose sources are + being compiled and linked, list of devices and build options specified are + all valid and appropriate host and device resources needed to perform the + build are available. + If _pfn_notify_ is `NULL`, {clBuildProgram} does not return until the build + has completed. + This callback function may be called asynchronously by the OpenCL + implementation. + It is the application's responsibility to ensure that the callback function + is thread-safe. + * _user_data_ will be passed as an argument when _pfn_notify_ is called. + _user_data_ can be `NULL`. - {CL_PROGRAM_BINARY_TYPE_EXECUTABLE_anchor} - An executable binary is - associated with _device_. - This is the case when the specified _program_ object was linked by - {clLinkProgram} without the `-create-library` link option, or when an - executable binary was built using {clBuildProgram}. -| {CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE_anchor} +The program executable is built from the program source or binary for all +the devices, or a specific device(s) in the OpenCL context associated with +_program_. +OpenCL allows program executables to be built using the source or the +binary. +{clBuildProgram} must be called for _program_ created using +{clCreateProgramWithSource}, {clCreateProgramWithIL} or +{clCreateProgramWithBinary} to build the program executable for one or more +devices associated with _program_. +If _program_ is created with {clCreateProgramWithBinary}, then the program +binary must be an executable binary (not a compiled binary or library). -include::{generated}/api/version-notes/CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE.asciidoc[] - | {size_t_TYPE} - | The total amount of storage, in bytes, used by program variables in - the global address space. -|==== +The executable binary can be queried using {clGetProgramInfo}(_program_, +{CL_PROGRAM_BINARIES}, ...) and can be specified to +{clCreateProgramWithBinary} to create a new program object. // refError -{clGetProgramBuildInfo} returns {CL_SUCCESS} if the function is executed +{clBuildProgram} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated - with _program_. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_PROGRAM} if _program_ is a not a valid program object. + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater + than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. + * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not + `NULL`. + * {CL_INVALID_DEVICE} if any device in _device_list_ is not in + the list of devices associated with _program_. + * {CL_INVALID_BINARY} if _program_ is created with + {clCreateProgramWithBinary} and devices listed in _device_list_ do not + have a valid program binary loaded. + * {CL_INVALID_BUILD_OPTIONS} if the build options specified by _options_ are + invalid. + * {CL_COMPILER_NOT_AVAILABLE} if _program_ is created with +ifdef::cl_khr_il_program[{clCreateProgramWithILKHR},] + {clCreateProgramWithSource} or {clCreateProgramWithIL} and a compiler is + not available, i.e. {CL_DEVICE_COMPILER_AVAILABLE} specified in the + <> table is set to {CL_FALSE}. + * {CL_BUILD_PROGRAM_FAILURE} if there is a failure to build the program + executable. + This error will be returned if {clBuildProgram} does not return until + the build has completed. + * {CL_INVALID_OPERATION} if the build of a program executable for any of the + devices listed in _device_list_ by a previous call to {clBuildProgram} + for _program_ has not completed. + * {CL_INVALID_OPERATION} if there are kernel objects attached to _program_. + * {CL_INVALID_OPERATION} if _program_ was not created with + {clCreateProgramWithSource}, {clCreateProgramWithIL} or + {clCreateProgramWithBinary}. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +-- -[NOTE] -==== -A program binary (compiled binary, library binary or executable binary) -built for a parent device can be used by all its sub-devices. -If a program binary has not been built for a sub-device, the program binary -associated with the parent device will be used. - -A program binary for a device specified with {clCreateProgramWithBinary} or -queried using {clGetProgramInfo} can be used as the binary for the -associated root device, and all sub-devices created from the root-level -device or sub-devices thereof. -==== --- +=== Separate Compilation and Linking of Programs -== Kernel Objects - -A kernel is a function declared in a program. -A kernel is identified by the `+__kernel+` qualifier applied to any function -in a program. -A kernel object encapsulates the specific `+__kernel+` function declared in -a program and the argument values to be used when executing this -`+__kernel+` function. +NOTE: Separate compilation and linking are <> +version 1.2. +OpenCL programs are compiled and linked to support the following: -=== Creating Kernel Objects + * Separate compilation and link stages. + Program sources can be compiled to generate a compiled binary object and + linked in a separate stage with other compiled program objects to the + program executable. + * Embedded headers. + In OpenCL 1.0 and 1.1, the I build option could be used to specify the + list of directories to be searched for headers files that are included + by a program source(s). + OpenCL 1.2 extends this by allowing the header sources to come from + program objects instead of just header files. + * Libraries. + The linker can be used to link compiled objects and libraries into a + program executable or to create a library of compiled binaries. -[open,refpage='clCreateKernel',desc='Creates a kernel object.',type='protos'] +[open,refpage='clCompileProgram',desc='Compiles a program\'s source for all the devices or a specific device(s) in the OpenCL context associated with a program.',type='protos'] -- -To create a kernel object, use the function - -include::{generated}/api/protos/clCreateKernel.txt[] -include::{generated}/api/version-notes/clCreateKernel.asciidoc[] +To compile a program's source for all the devices or a specific device(s) in +the OpenCL context associated with the program, call the function - * _program_ is a program object with a successfully built executable. - * _kernel_name_ is a function name in the program declared with the - `+__kernel+` qualifier. - * _errcode_ret_ will return an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +include::{generated}/api/protos/clCompileProgram.txt[] +include::{generated}/api/version-notes/clCompileProgram.asciidoc[] -// refError + * _program_ is the program object that is the compilation target. + * _device_list_ is a pointer to a list of devices associated with _program_. + If _device_list_ is a `NULL` value, the compile is performed for all devices + associated with _program_. + If _device_list_ is a non-`NULL` value, the compile is performed for devices + specified in this list. + * _num_devices_ is the number of devices listed in _device_list_. + * _options_ is a pointer to a null-terminated string of characters that + describes the compilation options to be used for building the program + executable. + If _options_ is a `NULL` pointer then it will have the same result as the + empty string. + Certain options are ignored when _program_ is created with IL. + The list of supported options is as described in <>. + * _num_input_headers_ specifies the number of programs that describe headers + in the array referenced by _input_headers_. + * _input_headers_ is an array of program embedded headers created with + {clCreateProgramWithSource}. + * _header_include_names_ is an array that has a one to one correspondence with + _input_headers_. + Each entry in _header_include_names_ specifies the include name used by + source in _program_ that comes from an embedded header. + The corresponding entry in _input_headers_ identifies the program object + which contains the header source to be used. + The embedded headers are first searched before the headers in the list of + directories specified by the `-I` compile option (as described in + <>). + If multiple entries in _header_include_names_ refer to the same header name, + the first one encountered will be used. + * _pfn_notify_ is a function pointer to a notification routine. + The notification routine is a callback function that an application can + register and which will be called when the program executable has been built + (successfully or unsuccessfully). + If _pfn_notify_ is not `NULL`, {clCompileProgram} does not need to wait for + the compiler to complete and can return immediately once the compilation can + begin. + Any state changes of the program object that result from calling + {clCompileProgram} (e.g. compile status or log) will be observable from this + callback function. + The compilation can begin if the context, program whose sources are being + compiled, list of devices, input headers, programs that describe input + headers and compiler options specified are all valid and appropriate host + and device resources needed to perform the compile are available. + If _pfn_notify_ is `NULL`, {clCompileProgram} does not return until the + compiler has completed. + This callback function may be called asynchronously by the OpenCL + implementation. + It is the application's responsibility to ensure that the callback function + is thread-safe. + * _user_data_ will be passed as an argument when _pfn_notify_ is called. + _user_data_ can be `NULL`. -{clCreateKernel} returns a valid non-zero kernel object and _errcode_ret_ is -set to {CL_SUCCESS} if the kernel object is created successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: +The pre-processor runs before the program sources are compiled. +The compiled binary is built for all devices associated with _program_ or +the list of devices specified. +The compiled binary can be queried using {clGetProgramInfo}(_program_, +{CL_PROGRAM_BINARIES}, ...) and can be passed to {clCreateProgramWithBinary} +to create a new program object. - * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. - * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built - executable for _program_. - * {CL_INVALID_KERNEL_NAME} if _kernel_name_ is not found in _program_. - * {CL_INVALID_KERNEL_DEFINITION} if the function definition for `+__kernel+` - function given by _kernel_name_ such as the number of arguments, the - argument types are not the same for all devices for which the _program_ - executable has been built. - * {CL_INVALID_VALUE} if _kernel_name_ is `NULL`. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +If _program_ was created using {clCreateProgramWithIL}, then +_num_input_headers_, _input_headers_, and _header_include_names_ are +ignored. -[open,refpage='clCreateKernelsInProgram',desc='Creates kernel objects for all kernel functions in a program object.',type='protos'] --- -To create kernel objects for all kernel functions in a program, -call the function +For example, consider the following program source: -include::{generated}/api/protos/clCreateKernelsInProgram.txt[] -include::{generated}/api/version-notes/clCreateKernelsInProgram.asciidoc[] +[source,opencl_c] +---- +#include +#include +__kernel void +image_filter (int n, int m, + __constant float *filter_weights, + __read_only image2d_t src_image, + __write_only image2d_t dst_image) +{ +... +} +---- - * _program_ is a program object with a successfully built executable. - * _num_kernels_ is the size of memory pointed to by _kernels_ specified as the - number of {cl_kernel_TYPE} entries. - * _kernels_ is the buffer where the kernel objects for kernels in _program_ - will be returned. - If _kernels_ is `NULL`, it is ignored. - If _kernels_ is not `NULL`, _num_kernels_ must be greater than or equal to - the number of kernels in _program_. - * _num_kernels_ret_ is the number of kernels in _program_. - If _num_kernels_ret_ is `NULL`, it is ignored. +This kernel includes two headers foo.h and mydir/myinc.h. +The following describes how these headers can be passed as embedded headers +in program objects: -Kernel objects are not created for any `+__kernel+` functions in _program_ -that do not have the same function definition across all devices for which a -program executable has been successfully built. +[source,opencl] +---- +cl_program foo_pg = clCreateProgramWithSource(context, + 1, &foo_header_src, NULL, &err); +cl_program myinc_pg = clCreateProgramWithSource(context, + 1, &myinc_header_src, NULL, &err); -Kernel objects can only be created once you have a program object with a -valid program source or binary loaded into the program object and the -program executable has been successfully built for one or more devices -associated with program. -No changes to the program executable are allowed while there are kernel -objects associated with a program object. -This means that calls to {clBuildProgram} and {clCompileProgram} return -{CL_INVALID_OPERATION} if there are kernel objects attached to a program -object. -The OpenCL context associated with _program_ will be the context associated -with _kernel_. -The list of devices associated with _program_ are the devices associated -with _kernel_. -Devices associated with a program object for which a valid program -executable has been built can be used to execute kernels declared in the -program object. +// lets assume the program source described above is given +// by program_A and is loaded via clCreateProgramWithSource +cl_program input_headers[2] = { foo_pg, myinc_pg }; +char * input_header_names[2] = { foo.h, mydir/myinc.h }; +clCompileProgram(program_A, + 0, NULL, // num_devices & device_list + NULL, // compile_options + 2, // num_input_headers + input_headers, + input_header_names, + NULL, NULL); // pfn_notify & user_data +---- // refError -{clCreateKernelsInProgram} will return {CL_SUCCESS} if the kernel objects were -successfully allocated. +{clCompileProgram} returns {CL_SUCCESS} if the function is executed +successfully. Otherwise, it returns one of the following errors: * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. - * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built - executable for any device in _program_. - * {CL_INVALID_VALUE} if _kernels_ is not `NULL` and _num_kernels_ is less - than the number of kernels in _program_. + * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater + than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. + * {CL_INVALID_VALUE} if _num_input_headers_ is zero and + _header_include_names_ or _input_headers_ are not `NULL` or if + _num_input_headers_ is not zero and _header_include_names_ or + _input_headers_ are `NULL`. + * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not + `NULL`. + * {CL_INVALID_DEVICE} if device in _device_list_ is not in + the list of devices associated with _program_. + * {CL_INVALID_COMPILER_OPTIONS} if the compiler options specified by + _options_ are invalid. + * {CL_INVALID_OPERATION} if the compilation or build of a program executable + for any of the devices listed in _device_list_ by a previous call to + {clCompileProgram} or {clBuildProgram} for _program_ has not completed. + * {CL_COMPILER_NOT_AVAILABLE} if a compiler is not available, i.e. + {CL_DEVICE_COMPILER_AVAILABLE} specified in the + <> table is set to {CL_FALSE}. + * {CL_COMPILE_PROGRAM_FAILURE} if there is a failure to compile the program + source. + This error will be returned if {clCompileProgram} does not return until + the compile has completed. + * {CL_INVALID_OPERATION} if there are kernel objects attached to _program_. + * {CL_INVALID_OPERATION} if _program_ has no source or IL available, i.e. it + has not been created with one of + ** {clCreateProgramWithIL} +ifdef::cl_khr_il_program[or {clCreateProgramWithILKHR}] +ifdef::cl_khr_spir[] + ** {clCreateProgramWithBinary} where `-x spir` is present in _options_, + if the `<>` extension is supported. +endif::cl_khr_spir[] + ** {clCreateProgramWithSource} * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -- -[open,refpage='clRetainKernel',desc='Increments the kernel object reference count.',type='protos'] +[open,refpage='clLinkProgram',desc='Links a set of compiled program objects and libraries for all the devices or a specific device(s) in the OpenCL context and creates a library or executable.',type='protos'] -- -To retain a kernel object, call the function - -include::{generated}/api/protos/clRetainKernel.txt[] -include::{generated}/api/version-notes/clRetainKernel.asciidoc[] - - * _kernel_ is the kernel object to be retained. - -The _kernel_ reference count is incremented. - -// refError +To link a set of compiled program objects and libraries for all the devices +or a specific device(s) in the OpenCL context and create a library or +executable, call the function -{clRetainKernel} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +include::{generated}/api/protos/clLinkProgram.txt[] +include::{generated}/api/version-notes/clLinkProgram.asciidoc[] - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -{clCreateKernel} or {clCreateKernelsInProgram} do an implicit retain. --- + * _context_ must be a valid OpenCL context. + * _device_list_ is a pointer to a list of devices that are in _context_. + If _device_list_ is a `NULL` value, the link is performed for all devices + associated with _context_ for which a compiled object is available. + If _device_list_ is a non-`NULL` value, the link is performed for devices + specified in this list for which a compiled object is available. + * _num_devices_ is the number of devices listed in _device_list_. + * _options_ is a pointer to a null-terminated string of characters that + describes the link options to be used for building the program executable. + The list of supported options is as described in <>. + If the program was created using {clCreateProgramWithBinary} and _options_ + is a `NULL` pointer, the program will be linked as if _options_ were the + same as when the program binary was originally built. + If the program was created using {clCreateProgramWithBinary} and _options_ + string contains anything other than the same options in the same order + (whitespace ignored) as when the program binary was originally built, then + the behavior is implementation-defined. + Otherwise, if _options_ is a `NULL` pointer then it will have the same + result as the empty string. + * _num_input_programs_ specifies the number of programs in array referenced by + _input_programs_. + * _input_programs_ is an array of program objects that are compiled binaries + or libraries that are to be linked to create the program executable. + For each device in _device_list_ or if _device_list_ is `NULL` the list of + devices associated with context, the following cases occur: + ** All programs specified by _input_programs_ contain a compiled binary or + library for the device. + In this case, a link is performed to generate a program executable for + this device. + ** None of the programs contain a compiled binary or library for that + device. + In this case, no link is performed and there will be no program + executable generated for this device. + ** All other cases will return a {CL_INVALID_OPERATION} error. + * _pfn_notify_ is a function pointer to a notification routine. + The notification routine is a callback function that an application can + register and which will be called when the program executable has been built + (successfully or unsuccessfully). + * _user_data_ will be passed as an argument when _pfn_notify_ is called. + _user_data_ can be `NULL`. -[open,refpage='clReleaseKernel',desc='Decrements the kernel reference count.',type='protos'] --- -To release a kernel object, call the function +If _pfn_notify_ is not `NULL`, {clLinkProgram} does not need to wait for the +linker to complete, and can return immediately once the linking operation can +begin. +Once the linker has completed, the _pfn_notify_ callback function is called +which returns the program object returned by {clLinkProgram}. +Any state changes of the program object that result from calling {clLinkProgram} +(e.g. link status or log) will be observable from this callback function. +This callback function may be called asynchronously by the OpenCL +implementation. +It is the application's responsibility to ensure that the callback function +is thread-safe. -include::{generated}/api/protos/clReleaseKernel.txt[] -include::{generated}/api/version-notes/clReleaseKernel.asciidoc[] +If _pfn_notify_ is `NULL`, {clLinkProgram} does not return until the linker +has completed. - * _kernel_ is the kernel object to be released. +{clLinkProgram} creates a new program object which contains the library or +executable. +The library or executable binary can be queried using +{clGetProgramInfo}(_program_, {CL_PROGRAM_BINARIES}, ...) and can be specified +to {clCreateProgramWithBinary} to create a new program object. -The _kernel_ reference count is decremented. +The devices associated with the returned program object will be the list of +devices specified by _device_list_ or if _device_list_ is `NULL` it will be +the list of devices associated with _context_. -The kernel object is deleted once the number of instances that are retained -to _kernel_ become zero and the kernel object is no longer needed by any -enqueued commands that use _kernel_. -Using this function to release a reference that was not obtained by creating -the object or by calling {clRetainKernel} causes undefined behavior. +The linking operation can begin if the context, list of devices, input +programs and linker options specified are all valid and appropriate host and +device resources needed to perform the link are available. +If the linking operation can begin, {clLinkProgram} returns a valid non-zero +program object. // refError -{clReleaseKernel} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +If _pfn_notify_ is `NULL`, _errcode_ret_ will be set to {CL_SUCCESS} if +the link operation was successful and {CL_LINK_PROGRAM_FAILURE} if there is a +failure to link the compiled binaries and/or libraries. - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. +If _pfn_notify_ is not `NULL`, {clLinkProgram} does not have to wait until +the linker to complete and can return {CL_SUCCESS} in _errcode_ret_ if the +linking operation can begin. +The _pfn_notify_ callback function will return a {CL_SUCCESS} or +{CL_LINK_PROGRAM_FAILURE} if the linking operation was successful or not. + +Otherwise {clLinkProgram} returns a `NULL` program object with an +appropriate error in _errcode_ret_. +The application should query the linker status of this program object to +check if the link was successful or not. +The list of errors that can be returned are: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_VALUE} if _device_list_ is `NULL` and _num_devices_ is greater + than zero, or if _device_list_ is not `NULL` and _num_devices_ is zero. + * {CL_INVALID_VALUE} if _num_input_programs_ is zero and _input_programs_ is + `NULL` or if _num_input_programs_ is zero and _input_programs_ is not + `NULL` or if _num_input_programs_ is not zero and _input_programs_ is + `NULL`. + * {CL_INVALID_PROGRAM} if programs specified in _input_programs_ are not + valid program objects. + * {CL_INVALID_VALUE} if _pfn_notify_ is `NULL` but _user_data_ is not + `NULL`. + * {CL_INVALID_DEVICE} if any device in _device_list_ is not in + the list of devices associated with _context_. + * {CL_INVALID_LINKER_OPTIONS} if the linker options specified by _options_ + are invalid. + * {CL_INVALID_OPERATION} if the compilation or build of a program executable + for any of the devices listed in _device_list_ by a previous call to + {clCompileProgram} or {clBuildProgram} for _program_ has not completed. + * {CL_INVALID_OPERATION} if the rules for devices containing compiled + binaries or libraries as described in _input_programs_ argument above + are not followed. + * {CL_LINKER_NOT_AVAILABLE} if a linker is not available, i.e. + {CL_DEVICE_LINKER_AVAILABLE} specified in the + <> table is set to {CL_FALSE}. + * {CL_LINK_PROGRAM_FAILURE} if there is a failure to link the compiled + binaries and/or libraries. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -7092,2394 +9229,7039 @@ Otherwise, it returns one of the following errors: -- -=== Setting Kernel Arguments +[[compiler-options]] +=== Compiler Options -To execute a kernel, the kernel arguments must be set. +The compiler options are categorized as pre-processor options, options for +math intrinsics, options that control optimization and miscellaneous +options. +This specification defines a standard set of options that must be supported +by the compiler when building program executables online or offline from +OpenCL C/{cpp} or, where relevant, from an IL. +These may be extended by a set of vendor- or platform-specific options. -[open,refpage='clSetKernelArg',desc='Set the argument value for a specific argument of a kernel.',type='protos'] --- -To set the argument value for a specific argument of a kernel, call the -function -include::{generated}/api/protos/clSetKernelArg.txt[] -include::{generated}/api/version-notes/clSetKernelArg.asciidoc[] +[[preprocessor-options]] +==== Preprocessor Options - * _kernel_ is a valid kernel object. - * _arg_index_ is the argument index. - Arguments to the kernel are referred by indices that go from 0 for the - leftmost argument to _n_ - 1, where _n_ is the total number of arguments - declared by a kernel (see below). - * _arg_size_ specifies the size of the argument value. - If the argument is a memory object, the _arg_size_ value must be equal to - `sizeof({cl_mem_TYPE})`. - For arguments declared with the `local` qualifier, the size specified will - be the size in bytes of the buffer that must be allocated for the `local` - argument. - If the argument is of type _sampler_t_, the _arg_size_ value must be equal - to `sizeof({cl_sampler_TYPE})`. - If the argument is of type _queue_t_, the _arg_size_ value must be equal to - `sizeof({cl_command_queue_TYPE})`. - For all other arguments, the size will be the size of argument type. - * _arg_value_ is a pointer to data that should be used as the argument value - for argument specified by _arg_index_. - The argument data pointed to by _arg_value_ is copied and the _arg_value_ - pointer can therefore be reused by the application after {clSetKernelArg} - returns. - The argument value specified is the value used by all API calls that enqueue - _kernel_ ({clEnqueueNDRangeKernel} and {clEnqueueTask}) until the argument - value is changed by a call to {clSetKernelArg} for _kernel_. +These options control the OpenCL C/{cpp} preprocessor which is run on each +program source before actual compilation. +These options are ignored for programs created with IL. -For example, consider the following kernel: +`-D name` :: + Predefine _name_ as a macro, with definition 1. -[source,opencl_c] ----- -kernel void image_filter (int n, - int m, - constant float *filter_weights, - read_only image2d_t src_image, - write_only image2d_t dst_image) -{ -... -} ----- +`-D name=definition` :: + The contents of _definition_ are tokenized and processed as if they + appeared during translation phase three in a `#define` directive. + In particular, the definition will be truncated by embedded newline + characters. ++ +-- +`-D` options are processed in the order they are given in the _options_ +argument to {clBuildProgram} or {clCompileProgram}. +Note that a space is required between the `-D` option and the symbol it +defines, otherwise behavior is implementation-defined. +-- -Argument index values for `image_filter` will be 0 for `n`, 1 for `m`, 2 for -`filter_weights`, 3 for `src_image` and 4 for `dst_image`. +`-I dir` :: + Add the directory _dir_ to the list of directories to be searched for + header files. + _dir_ can optionally be enclosed in double quotes. ++ +-- +This option is not portable due to its dependency on host file system and +host operating system. +It is supported for backwards compatibility with previous OpenCL versions. +Developers are encouraged to create and use explicit header objects by means +of {clCompileProgram} followed by {clLinkProgram}. +-- -If the argument is a memory object (buffer, pipe, image or image array), the -_arg_value_ entry will be a pointer to the appropriate buffer, pipe, image -or image array object. -The memory object must be created with the context associated with the -kernel object. -If the argument is a buffer object, the _arg_value_ pointer can be `NULL` or -point to a `NULL` value in which case a `NULL` value will be used as the -value for the argument declared as a pointer to `global` or `constant` -memory in the kernel. -If the argument is declared with the `local` qualifier, the _arg_value_ -entry must be `NULL`. -If the argument is of type _sampler_t_, the _arg_value_ entry must be a -pointer to the sampler object. -If the argument is of type _queue_t_, the _arg_value_ entry must be a -pointer to the device queue object. -If the argument is declared to be a pointer of a built-in scalar or vector -type, or a user defined structure type in the global or constant address -space, the memory object specified as argument value must be a buffer object -(or `NULL`). -If the argument is declared with the `constant` qualifier, the size in bytes -of the memory object cannot exceed {CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE} and -the number of arguments declared as pointers to `constant` memory cannot -exceed {CL_DEVICE_MAX_CONSTANT_ARGS}. +[[math-intrinsics-options]] +==== Math Intrinsics Options -The memory object specified as argument value must be a pipe object if the -argument is declared with the _pipe_ qualifier. +These options control compiler behavior regarding floating-point arithmetic. +These options trade off between speed and correctness. -The memory object specified as argument value must be a 2D image object if -the argument is declared to be of type _image2d_t_. -The memory object specified as argument value must be a 2D image object with -image channel order = {CL_DEPTH} if the argument is declared to be of type -_image2d_depth_t_. -The memory object specified as argument value must be a 3D image object if -argument is declared to be of type _image3d_t_. -The memory object specified as argument value must be a 1D image object if -the argument is declared to be of type _image1d_t_. -The memory object specified as argument value must be a 1D image buffer -object if the argument is declared to be of type _image1d_buffer_t_. -The memory object specified as argument value must be a 1D image array -object if argument is declared to be of type _image1d_array_t_. -The memory object specified as argument value must be a 2D image array -object if argument is declared to be of type _image2d_array_t_. -The memory object specified as argument value must be a 2D image array -object with image channel order = {CL_DEPTH} if argument is declared to be of -type _image2d_array_depth_t_. - -For all other kernel arguments, the _arg_value_ entry must be a pointer to -the actual data to be used as argument value. - -[NOTE] -==== -A kernel object does not update the reference count for objects such as -memory or sampler objects specified as argument values by {clSetKernelArg}. -Users may not rely on a kernel object to retain objects specified as -argument values to the kernel. - -Implementations shall not allow {cl_kernel_TYPE} objects to hold reference -counts to {cl_kernel_TYPE} arguments, because no mechanism is provided for the -user to tell the kernel to release that ownership right. -If the kernel holds ownership rights on kernel args, that would make it -impossible for users to tell with certainty when they may safely -release user allocated resources associated with OpenCL objects such as -the {cl_mem_TYPE} backing store used with {CL_MEM_USE_HOST_PTR}. -==== - -// refError +`-cl-single-precision-constant` :: + This option forces implicit conversions of double-precision floating-point + literals to single precision. + This option is ignored for programs created with IL. -{clSetKernelArg} returns {CL_SUCCESS} if the function was executed -successfully. -Otherwise, it returns one of the following errors: +`-cl-denorms-are-zero` :: + This option controls how single precision and double precision + denormalized numbers are handled. + If specified as a build option, the single precision denormalized + numbers may be flushed to zero; double precision denormalized numbers + may also be flushed to zero if the optional extension for double + precision is supported. + This is intended to be a performance hint and the OpenCL compiler can + choose not to flush denorms to zero if the device supports single + precision (or double precision) denormalized numbers. ++ +-- +This option is ignored for single precision numbers if the device does not +support single precision denormalized numbers i.e. {CL_FP_DENORM} bit is not +set in {CL_DEVICE_SINGLE_FP_CONFIG}. - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. - * {CL_INVALID_ARG_VALUE} if _arg_value_ specified is not a valid value. - * {CL_INVALID_MEM_OBJECT} for an argument declared to be a memory object - when the specified _arg_value_ is not a valid memory object. - * {CL_INVALID_SAMPLER} for an argument declared to be of type _sampler_t_ - when the specified _arg_value_ is not a valid sampler object. - * {CL_INVALID_DEVICE_QUEUE} for an argument declared to be of type _queue_t_ - when the specified _arg_value_ is not a valid device queue object. - This error code is <> version 2.0. - * {CL_INVALID_ARG_SIZE} if _arg_size_ does not match the size of the data - type for an argument that is not a memory object or if the argument is a - memory object and _arg_size_ != `sizeof({cl_mem_TYPE})` or if _arg_size_ is - zero and the argument is declared with the local qualifier or if the - argument is a sampler and _arg_size_ != `sizeof({cl_sampler_TYPE})`. - * {CL_MAX_SIZE_RESTRICTION_EXCEEDED} if the size in bytes of the memory - object (if the argument is a memory object) or _arg_size_ (if the - argument is declared with `local` qualifier) exceeds a language- - specified maximum size restriction for this argument, such as the - *MaxByteOffset* SPIR-V decoration. - This error code is <> version 2.2. - * {CL_INVALID_ARG_VALUE} if the argument is an image declared with the - `read_only` qualifier and _arg_value_ refers to an image object created - with _cl_mem_flags_ of {CL_MEM_WRITE_ONLY} or if the image argument is - declared with the `write_only` qualifier and _arg_value_ refers to an - image object created with _cl_mem_flags_ of {CL_MEM_READ_ONLY}. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +This option is ignored for double precision numbers if the device does not +support double precision or if it does support double precision but not +double precision denormalized numbers i.e. {CL_FP_DENORM} bit is not set in +{CL_DEVICE_DOUBLE_FP_CONFIG}. -When {clSetKernelArg} returns an error code different from {CL_SUCCESS}, the -internal state of _kernel_ may only be modified when that error code is -{CL_OUT_OF_RESOURCES} or {CL_OUT_OF_HOST_MEMORY}. When the internal state -of _kernel_ is modified, it is implementation-defined whether: +This flag only applies for scalar and vector single precision floating-point +variables and computations on these floating-point variables inside a +program. +It does not apply to reading from or writing to image objects. +-- - * The argument value that was previously set is kept so that it can be used in - further kernel enqueues. - * The argument value is unset such that a subsequent kernel enqueue fails with - {CL_INVALID_KERNEL_ARGS}. footnote:[{fn-setkernelarg-prefer-unset-on-error}] +`-cl-fp32-correctly-rounded-divide-sqrt` :: + The `-cl-fp32-correctly-rounded-divide-sqrt` build option to + {clBuildProgram} or {clCompileProgram} allows an application to specify + that single precision floating-point divide (x/y and 1/x) and sqrt used + in the program source are correctly rounded. + If this build option is not specified, the minimum numerical accuracy of + single precision floating-point divide and sqrt are as defined in the + OpenCL C or OpenCL SPIR-V Environment specifications. ++ -- +This build option can only be specified if the +{CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT} is set in {CL_DEVICE_SINGLE_FP_CONFIG} (as +defined in the <> table) for devices +that the program is being build. +{clBuildProgram} or {clCompileProgram} will fail to compile the program for +a device if the `-cl-fp32-correctly-rounded-divide-sqrt` option is specified +and {CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT} is not set for the device. -[open,refpage='clSetKernelArgSVMPointer',desc='Set a SVM pointer as the argument value for a specific argument of a kernel.',type='protos'] +Note: This option is <> version 1.2. -- -To set a SVM pointer as the argument value for a specific argument of a -kernel, call the function -include::{generated}/api/protos/clSetKernelArgSVMPointer.txt[] -include::{generated}/api/version-notes/clSetKernelArgSVMPointer.asciidoc[] - * _kernel_ is a valid kernel object. - * _arg_index_ is the argument index. - Arguments to the kernel are referred by indices that go from 0 for the - leftmost argument to _n_ - 1, where _n_ is the total number of arguments - declared by a kernel. - * _arg_value_ is the SVM pointer that should be used as the argument value for - argument specified by _arg_index_. - The SVM pointer specified is the value used by all API calls that enqueue - _kernel_ ({clEnqueueNDRangeKernel} and {clEnqueueTask}) until the argument - value is changed by a call to {clSetKernelArgSVMPointer} for _kernel_. - The SVM pointer can only be used for arguments that are declared to be a - pointer to `global` or `constant` memory. - The SVM pointer value must be aligned according to the arguments type. - For example, if the argument is declared to be `+global float4 *p+`, the SVM - pointer value passed for `p` must be at a minimum aligned to a `float4`. - The SVM pointer value specified as the argument value can be the pointer - returned by {clSVMAlloc} or can be a pointer offset into the SVM region. +[[optimization-options]] +==== Optimization Options -// refError +These options control various sorts of optimizations. +Turning on optimization flags makes the compiler attempt to improve the +performance and/or code size at the expense of compilation time and possibly +the ability to debug the program. -{clSetKernelArgSVMPointer} returns {CL_SUCCESS} if the function was executed -successfully. -Otherwise, it returns one of the following errors: +`-cl-opt-disable` :: + This option disables all optimizations. + The default is optimizations are enabled. - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_INVALID_OPERATION} if no devices in the context associated with _kernel_ support SVM. - * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. - * {CL_INVALID_ARG_VALUE} if _arg_value_ specified is not a valid value. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +`-cl-strict-aliasing` :: + This option allows the compiler to assume the strictest aliasing rules. ++ +-- +Note: This option is <> version 1.1. -- -[open,refpage='clSetKernelExecInfo',desc='Pass additional information other than argument values to a kernel.',type='protos'] +`-cl-uniform-work-group-size` :: + This requires that the global work-size be a multiple of the work-group + size specified to {clEnqueueNDRangeKernel}. + Allow optimizations that are made possible by this restriction. ++ +-- +Note: This option is <> version 2.0. -- -To pass additional information other than argument values to a kernel, call -the function -include::{generated}/api/protos/clSetKernelExecInfo.txt[] -include::{generated}/api/version-notes/clSetKernelExecInfo.asciidoc[] +`-cl-no-subgroup-ifp` :: + This indicates that kernels in this program do not require sub-groups to + make independent forward progress. + Allows optimizations that are made possible by this restriction. + This option has no effect for devices that do not support independent + forward progress for sub-groups. ++ +-- +Note: This option is <> version 2.1. +-- - * _kernel_ specifies the kernel object being queried. - * _param_name_ specifies the information to be passed to kernel. - The list of supported _param_name_ types and the corresponding values passed - in _param_value_ is described in the <> table. - * _param_value_size_ specifies the size in bytes of the memory pointed to by - _param_value_. - * _param_value_ is a pointer to memory where the appropriate values determined - by _param_name_ are specified. +The following options control compiler behavior regarding floating-point +arithmetic. +These options trade off between performance and correctness and must be +specifically enabled. +These options are not turned on by default since it can result in incorrect +output for programs which depend on an exact implementation of IEEE 754 +rules/specifications for math functions. -[[kernel-exec-info-table]] -.List of supported param_names by {clSetKernelExecInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Kernel Exec Info | Type | Description -| {CL_KERNEL_EXEC_INFO_SVM_PTRS_anchor} +`-cl-mad-enable` :: + Allow `a * b + c` to be replaced by a *mad* instruction. + The *mad* instruction may compute `a * b + c` with reduced accuracy + in the embedded profile. + See the OpenCL C or OpenCL SPIR-V Environment specification for accuracy + details. + On some hardware the *mad* instruction may provide better performance + than the expanded computation. -include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_SVM_PTRS.asciidoc[] - | {void_TYPE}*[] - | SVM pointers must reference locations contained entirely within - buffers that are passed to kernel as arguments, or that are passed - through the execution information. +`-cl-no-signed-zeros` :: + Allow optimizations for floating-point arithmetic that ignore the + signedness of zero. + IEEE 754 arithmetic specifies the distinct behavior of `+0.0` and `-0.0` + values, which then prohibits simplification of expressions such as `x + {plus} 0.0` or `0.0 * x` (even with `-cl-finite-math-only`). + This option implies that the sign of a zero result is not significant. - Non-argument SVM buffers must be specified by passing pointers to - those buffers via {clSetKernelExecInfo} for coarse-grain and - fine-grain buffer SVM allocations but not for finegrain system SVM - allocations. -| {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_anchor} +`-cl-unsafe-math-optimizations` :: + Allow optimizations for floating-point arithmetic that (a) assume that + arguments and results are valid, (b) may violate the IEEE 754 standard, + (c) assume relaxed OpenCL numerical compliance requirements as defined + in the unsafe math optimization section of the OpenCL C or OpenCL SPIR-V + Environment specifications, and (d) may violate edge case behavior in the + OpenCL C or OpenCL SPIR-V Environment specifications. + This option includes the `-cl-no-signed-zeros`, `-cl-mad-enable`, and + `-cl-denorms-are-zero` footnote:[{fn-unsafe-denorms-are-zero}] options. -include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM.asciidoc[] - | {cl_bool_TYPE} - | This flag indicates whether the kernel uses pointers that are fine - grain system SVM allocations. - These fine grain system SVM pointers may be passed as arguments or - defined in SVM buffers that are passed as arguments to _kernel_. -|==== +`-cl-finite-math-only` :: + Allow optimizations for floating-point arithmetic that assume that + arguments and results are not NaNs, +Inf, -Inf. + This option may violate the OpenCL numerical compliance requirements for + single precision and double precision floating-point, as well as edge + case behavior. -// refError +`-cl-fast-relaxed-math` :: + Sets the optimization options `-cl-finite-math-only` and + `-cl-unsafe-math-optimizations`. + This option causes the preprocessor macro `+__FAST_RELAXED_MATH__+` to + be defined in the OpenCL program. -{clSetKernelExecInfo} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. - * {CL_INVALID_OPERATION} if no devices in the context associated with _kernel_ support SVM. - * {CL_INVALID_VALUE} if _param_name_ is not valid, if _param_value_ is - `NULL` or if the size specified by _param_value_size_ is not valid. - * {CL_INVALID_OPERATION} if _param_name_ is - {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} and _param_value_ is {CL_TRUE} - but no devices in context associated with _kernel_ support fine-grain - system SVM allocations. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +==== Options to Request or Suppress Warnings -[NOTE] -==== -Coarse-grain or fine-grain buffer SVM pointers used by a kernel which -are not passed as a kernel arguments must be specified using -{clSetKernelExecInfo} with {CL_KERNEL_EXEC_INFO_SVM_PTRS}. -For example, if SVM buffer A contains a pointer to another SVM buffer B, -and the kernel dereferences that pointer, then a pointer to B must -either be passed as an argument in the call to that kernel or it must be -made available to the kernel using {clSetKernelExecInfo}. -For example, we might pass extra SVM pointers as follows: +Warnings are diagnostic messages that report constructions which are not +inherently erroneous but which are risky or suggest there may have been an +error. +The following language-independent options do not enable specific warnings +but control the kinds of diagnostics produced by the OpenCL compiler. +These options are ignored for programs created with IL. -[source,opencl] ----- -clSetKernelExecInfo(kernel, - CL_KERNEL_EXEC_INFO_SVM_PTRS, - num_ptrs * sizeof(void *), - extra_svm_ptr_list); ----- +`-w` :: + Inhibit all warning messages. -Here `num_ptrs` specifies the number of additional SVM pointers while -`extra_svm_ptr_list` specifies a pointer to memory containing those SVM -pointers. +`-Werror` :: + Make all warnings into errors. -When calling {clSetKernelExecInfo} with {CL_KERNEL_EXEC_INFO_SVM_PTRS} to -specify pointers to non-argument SVM buffers as extra arguments to a kernel, -each of these pointers can be the SVM pointer returned by {clSVMAlloc} or -can be a pointer + offset into the SVM region. -It is sufficient to provide one pointer for each SVM buffer used. -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} is used to indicate whether -SVM pointers used by a kernel will refer to system allocations or not. +[[opencl-c-version]] +==== Options Controlling the OpenCL C Version -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} = {CL_FALSE} indicates that the -OpenCL implementation may assume that system pointers are not passed as -kernel arguments and are not stored inside SVM allocations passed as kernel -arguments. +The following option controls the version of OpenCL C that the compiler +accepts. +These options are ignored for programs created with IL. -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} = {CL_TRUE} indicates that the -OpenCL implementation must assume that system pointers might be passed as -kernel arguments and/or stored inside SVM allocations passed as kernel -arguments. -In this case, if the device to which the kernel is enqueued does not support -system SVM pointers, {clEnqueueNDRangeKernel} and {clEnqueueTask} will return a -{CL_INVALID_OPERATION} error. -If none of the devices in the context associated with kernel support -fine-grain system SVM allocations, {clSetKernelExecInfo} will return a -{CL_INVALID_OPERATION} error. +`-cl-std=` :: + Determine the OpenCL C language version to use. + A value for this option must be provided. + Valid values are: ++ +-- + * `CL1.1`: Support OpenCL C 1.1 language features defined in _section 6_ of + the OpenCL 1.1 specification or in the unified OpenCL C specification. + * `CL1.2`: Support OpenCL C 1.2 language features defined in _section 6_ of + the OpenCL 1.2 specification or in the unified OpenCL C specification. + * `CL2.0`: Support OpenCL C 2.0 language features defined in the OpenCL C 2.0 + specification or in the unified OpenCL C specification. + * `CL3.0`: Support OpenCL C 3.0 language features defined in the unified + OpenCL C specification. +-- -If {clSetKernelExecInfo} has not been called with a value for -{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM}, the default value is used for -this kernel attribute. -The default value depends on whether the device on which the kernel is -enqueued supports fine-grain system SVM allocations. -If so, the default value used is {CL_TRUE} (system pointers might be passed); -otherwise, the default is {CL_FALSE}. +Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL1.1` +option *will fail* to compile the program for any devices with +{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.0 and when +{CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 1.1. -A call to {clSetKernelExecInfo} for a given value of _param_name_ -replaces any prior value passed for that value of _param_name_. -Only one _param_value_ will be stored for each value of _param_name_. -==== +Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL1.2` +option *will fail* to compile the program for any devices with +{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.1 or earlier +and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 1.2. +Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL2.0` +option *will fail* to compile the program for any devices with +{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 1.2 or earlier +and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 2.0. -=== Copying Kernel Objects +Calls to {clBuildProgram} or {clCompileProgram} with the `-cl-std=CL3.0` +option *will fail* to compile the program for any devices with +{CL_DEVICE_OPENCL_C_VERSION} equal to OpenCL C 2.0 or earlier +and when {CL_DEVICE_OPENCL_C_ALL_VERSIONS} does not include OpenCL C 3.0. -NOTE: Copying kernel objects is <> version 2.1. +If the `-cl-std` build option is not specified, the highest OpenCL C 1.x +language version supported by each device is used when compiling the program +for each device. +Applications are required to specify the `-cl-std=CL2.0` build option to +compile or build programs with OpenCL C 2.0 and the `-cl-std=CL3.0` +build option to compile or build programs with OpenCL C 3.0. -[open,refpage='clCloneKernel',desc='Make a shallow copy of the kernel object.',type='protos'] --- -To clone a kernel object, call the function -include::{generated}/api/protos/clCloneKernel.txt[] -include::{generated}/api/version-notes/clCloneKernel.asciidoc[] +==== Options for Querying Kernel Argument Information - * _source_kernel_ is a valid {cl_kernel_TYPE} object that will be copied. - _source_kernel_ will not be modified in any way by this function. - * _errcode_ret_ will be assigned an appropriate error code. - If _errcode_ret_ is `NULL`, no error code is returned. +IMPORTANT: Querying for kernel argument information is <> version 1.2. -Cloning is used to make a shallow copy of the kernel object, its arguments -and any information passed to the kernel object using {clSetKernelExecInfo}. -If the kernel object was ready to be enqueued before copying it, the clone -of the kernel object is ready to enqueue. +`-cl-kernel-arg-info` :: + This option allows the compiler to store information about the arguments + of a kernel(s) in the program executable. + The argument information stored includes the argument name, its type, + the address space and access qualifiers used. + Refer to description of {clGetKernelArgInfo} on how to query this + information. -The returned kernel object is an exact copy of _source_kernel_, with one -caveat: the reference count on the returned kernel object is set as if it -had been returned by {clCreateKernel}. -The reference count of _source_kernel will_ not be changed. -The resulting kernel will be in the same state as if {clCreateKernel} is -called to create the resultant kernel with the same arguments as those used -to create _source_kernel_, the latest call to {clSetKernelArg} or -{clSetKernelArgSVMPointer} for each argument index applied to kernel and the -last call to {clSetKernelExecInfo} for each value of the param name -parameter are applied to the new kernel object. +==== Options for Debugging Your Program -All arguments of the new kernel object must be intact and it may be -correctly used in the same situations as kernel except those that assume a -pre-existing reference count. -Setting arguments on the new kernel object will not affect _source_kernel_ -except insofar as the argument points to a shared underlying entity and in -that situation behavior is as if two kernel objects had been created and the -same argument applied to each. -Only the data stored in the kernel object is copied; data referenced by the -kernels arguments are not copied. -For example, if a buffer or pointer argument is set on a kernel object, the -pointer is copied but the underlying memory allocation is not. +IMPORTANT: Debugging options are <> version 2.0. -// refError +`-g` :: + This option can currently be used to generate additional errors for the + built-in functions that allow you to enqueue commands on a device (refer + to OpenCL kernel languages specifications). -{clCloneKernel} returns a valid non-zero kernel object and _errcode_ret_ is -set to {CL_SUCCESS} if the kernel is successfully copied. -Otherwise it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +[[linker-options]] +=== Linker Options +NOTE: Linker options are <> version 1.2. -=== Kernel Object Queries +This specification defines a standard set of linker options that must be +supported by the OpenCL C compiler when linking compiled programs online or +offline. +These linker options are categorized as library linking options and program +linking options. +These may be extended by a set of vendor- or platform-specific options. -[open,refpage='clGetKernelInfo',desc='Returns information about the kernel object.',type='protos'] --- -To return information about a kernel object, call the function -include::{generated}/api/protos/clGetKernelInfo.txt[] -include::{generated}/api/version-notes/clGetKernelInfo.asciidoc[] +==== Library Linking Options - * _kernel_ specifies the kernel object being queried. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetKernelInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. +IMPORTANT: Library linking options are <> version +1.2. -[[kernel-info-table]] -.List of supported param_names by {clGetKernelInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Kernel Info | Return Type | Description -| {CL_KERNEL_FUNCTION_NAME_anchor} +The following options can be specified when creating a library of compiled +binaries. -include::{generated}/api/version-notes/CL_KERNEL_FUNCTION_NAME.asciidoc[] - | {char_TYPE}[] - | Return the kernel function name. -| {CL_KERNEL_NUM_ARGS_anchor} +`-create-library` :: + Create a library of compiled binaries specified in _input_programs_ + argument to {clLinkProgram}. -include::{generated}/api/version-notes/CL_KERNEL_NUM_ARGS.asciidoc[] - | {cl_uint_TYPE} - | Return the number of arguments to kernel. -| {CL_KERNEL_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] +`-enable-link-options` :: + Allows the linker to modify the library behavior based on one or more + link options (described in <>) when this library is linked with a program executable. + This option must be specified with the create-library option. -include::{generated}/api/version-notes/CL_KERNEL_REFERENCE_COUNT.asciidoc[] - | {cl_uint_TYPE} - | Return the _kernel_ reference count. -| {CL_KERNEL_CONTEXT_anchor} -include::{generated}/api/version-notes/CL_KERNEL_CONTEXT.asciidoc[] - | {cl_context_TYPE} - | Return the context associated with _kernel_. -| {CL_KERNEL_PROGRAM_anchor} +[[program-linking-options]] +==== Program Linking Options -include::{generated}/api/version-notes/CL_KERNEL_PROGRAM.asciidoc[] - | {cl_program_TYPE} - | Return the program object associated with kernel. -| {CL_KERNEL_ATTRIBUTES_anchor} +The following options can be specified when linking a program executable. -include::{generated}/api/version-notes/CL_KERNEL_ATTRIBUTES.asciidoc[] - | {char_TYPE}[] - | Returns any attributes specified using the `+__attribute__+` - OpenCL C qualifier (or using an OpenCL {cpp} qualifier syntax [[]] ) - with the kernel function declaration in the program source. - These attributes include attributes described in the earlier OpenCL - C kernel language specifications and other attributes supported by - an implementation. - - Attributes are returned as they were declared inside - `+__attribute__((...))+`, with any surrounding whitespace and - embedded newlines removed. - When multiple attributes are present, they are returned as a single, - space delimited string. - - For kernels not created from OpenCL C source and the - {clCreateProgramWithSource} API call the string returned from this - query will be empty. -|==== - -// refError +`-cl-denorms-are-zero` + +`-cl-no-signed-zeros` + +`-cl-unsafe-math-optimizations` + +`-cl-finite-math-only` + +`-cl-fast-relaxed-math` + +`-cl-no-subgroup-ifp` (<> version 2.1) -{clGetKernelInfo} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: +The options are described in <> and <>. +The linker may apply these options to all compiled program objects +specified to {clLinkProgram}. +The linker may apply these options only to libraries which were created +with the option `-enable-link-options`. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and _param_value_ - is not `NULL`. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- -[open,refpage='clGetKernelWorkGroupInfo',desc='Returns information about the kernel object that may be specific to a device.',type='protos'] --- -To return information about the kernel object that may be specific to a -device, call the function +ifdef::cl_khr_spir[] +[[spir-compilation-options]] +==== SPIR Compilation Options -include::{generated}/api/protos/clGetKernelWorkGroupInfo.txt[] -include::{generated}/api/version-notes/clGetKernelWorkGroupInfo.asciidoc[] +If the `<>` extension is supported, the compile option - * _kernel_ specifies the kernel object being queried. - * _device_ identifies a specific device in the list of devices associated with - _kernel_. - The list of devices is the list of devices in the OpenCL context that is - associated with _kernel_. - If the list of devices associated with _kernel_ is a single device, _device_ - can be a `NULL` value. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetKernelWorkGroupInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. +`-x spir` -[[kernel-workgroup-info-table]] -.List of supported param_names by {clGetKernelWorkGroupInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Kernel Work-group Info | Return Type | Description -| {CL_KERNEL_GLOBAL_WORK_SIZE_anchor} +must be specified to indicate that the binary is in SPIR format, and the +compile option -include::{generated}/api/version-notes/CL_KERNEL_GLOBAL_WORK_SIZE.asciidoc[] - | {size_t_TYPE}[3] - | This provides a mechanism for the application to query the maximum - global size that can be used to execute a kernel (i.e. - _global_work_size_ argument to {clEnqueueNDRangeKernel}) on a custom - device given by device or a built-in kernel on an OpenCL device - given by device. +`-spir-std` - If device is not a custom device and kernel is not a built-in - kernel, {clGetKernelWorkGroupInfo} returns the error - {CL_INVALID_VALUE}. -| {CL_KERNEL_WORK_GROUP_SIZE_anchor} +must be used to specify the version of the SPIR specification that describes +the format and meaning of the binary. -include::{generated}/api/version-notes/CL_KERNEL_WORK_GROUP_SIZE.asciidoc[] - | {size_t_TYPE} - | This provides a mechanism for the application to query the maximum - work-group size that can be used to execute the kernel on a specific - device given by device. - The OpenCL implementation uses the resource requirements of the - kernel (register usage etc.) to determine what this work-group size - should be. +For example, if the binary is as described in SPIR version 1.2, then - As a result and unlike {CL_DEVICE_MAX_WORK_GROUP_SIZE} this value may - vary from one kernel to another as well as one device to another. +`-spir-std=1.2` - {CL_KERNEL_WORK_GROUP_SIZE} will be less than or equal to - {CL_DEVICE_MAX_WORK_GROUP_SIZE} for a given kernel object. -| {CL_KERNEL_COMPILE_WORK_GROUP_SIZE_anchor} +must be specified. +Failing to specify these compile options may result in +implementation-defined behavior. +endif::cl_khr_spir[] -include::{generated}/api/version-notes/CL_KERNEL_COMPILE_WORK_GROUP_SIZE.asciidoc[] - | {size_t_TYPE}[3] - | Returns the work-group size specified in the kernel source or IL. - If the work-group size is not specified in the kernel source or IL, - (0, 0, 0) is returned. -| {CL_KERNEL_LOCAL_MEM_SIZE_anchor} +=== Unloading the OpenCL Compiler -include::{generated}/api/version-notes/CL_KERNEL_LOCAL_MEM_SIZE.asciidoc[] - | {cl_ulong_TYPE} - | Returns the amount of local memory in bytes being used by a kernel. - This includes local memory that may be needed by an implementation - to execute the kernel, variables declared inside the kernel with the - `+__local+` address qualifier and local memory to be allocated for - arguments to the kernel declared as pointers with the `+__local+` - address qualifier and whose size is specified with {clSetKernelArg}. +[open,refpage='clUnloadPlatformCompiler',desc='Allows the implementation to release the resources allocated by the OpenCL compiler for a platform.',type='protos'] +-- +To unload an OpenCL compiler for a platform, call the function - If the local memory size, for any pointer argument to the kernel - declared with the `+__local+` address qualifier, is not specified, - its size is assumed to be 0. -| {CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE_anchor} +include::{generated}/api/protos/clUnloadPlatformCompiler.txt[] +include::{generated}/api/version-notes/clUnloadPlatformCompiler.asciidoc[] -include::{generated}/api/version-notes/CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE.asciidoc[] - | {size_t_TYPE} - | Returns the preferred multiple of work-group size for launch. - This is a performance hint. - Specifying a work-group size that is not a multiple of the value - returned by this query as the value of the local work size argument - to {clEnqueueNDRangeKernel} will not fail to enqueue the kernel for - execution unless the work-group size specified is larger than the - device maximum. -| {CL_KERNEL_PRIVATE_MEM_SIZE_anchor} + * _platform_ is the platform to unload. -include::{generated}/api/version-notes/CL_KERNEL_PRIVATE_MEM_SIZE.asciidoc[] - | {cl_ulong_TYPE} - | Returns the minimum amount of private memory, in bytes, used by each - work-item in the kernel. - This value may include any private memory needed by an - implementation to execute the kernel, including that used by the - language built-ins and variable declared inside the kernel with the - `+__private+` qualifier. -|==== +This function allows the implementation to release the resources allocated +by the OpenCL compiler for _platform_. +This is a hint from the application and does not guarantee that the compiler +will not be used in the future or that the compiler will actually be +unloaded by the implementation. +Calls to {clBuildProgram}, {clCompileProgram} or {clLinkProgram} after +{clUnloadPlatformCompiler} will reload the compiler, if necessary, to build +the appropriate program executable. // refError -{clGetKernelWorkGroupInfo} returns {CL_SUCCESS} if the function is executed +{clUnloadPlatformCompiler} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated - with _kernel_ or if _device_ is `NULL` but there is more than one device - associated with _kernel_. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table - and _param_value_ is not `NULL`. - * {CL_INVALID_VALUE} if _param_name_ is {CL_KERNEL_GLOBAL_WORK_SIZE} and - _device_ is not a custom device and _kernel_ is not a built-in kernel. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. + * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. -- -[open,refpage='clGetKernelSubGroupInfo',desc='Returns information about the kernel object.',type='protos'] +[open,refpage='clUnloadCompiler',desc='Allows the implementation to release the resources allocated by the OpenCL compiler.',type='protos'] -- -To return information about a kernel object, call the function +Alternatively, if you are not using OpenCL via the ICD loader, you may unload the OpenCL compiler with the function -include::{generated}/api/protos/clGetKernelSubGroupInfo.txt[] -include::{generated}/api/version-notes/clGetKernelSubGroupInfo.asciidoc[] -Also see extension *cl_khr_subgroups*. +include::{generated}/api/protos/clUnloadCompiler.txt[] +include::{generated}/api/version-notes/clUnloadCompiler.asciidoc[] - * _kernel_ specifies the kernel object being queried. - * _device_ identifies a specific device in the list of devices associated with - _kernel_. - The list of devices is the list of devices in the OpenCL context that is - associated with _kernel_. - If the list of devices associated with _kernel_ is a single device, _device_ - can be a `NULL` value. +This function allows the implementation to release the resources allocated +by the OpenCL compiler. +This is a hint from the application and does not guarantee that the compiler +will not be used in the future or that the compiler will actually be +unloaded by the implementation. +Calls to {clBuildProgram}, {clCompileProgram} or {clLinkProgram} after +{clUnloadCompiler} will reload the compiler, if necessary, to build +the appropriate program executable. + +// refError + +{clUnloadCompiler} will always return {CL_SUCCESS}. +-- + + +=== Program Object Queries + +[open,refpage='clGetProgramInfo',desc='Returns information about the program object.',type='protos'] +-- +To return information about a program object, call the function + +include::{generated}/api/protos/clGetProgramInfo.txt[] +include::{generated}/api/version-notes/clGetProgramInfo.asciidoc[] + + * _program_ specifies the program object being queried. * _param_name_ specifies the information to query. The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetKernelSubGroupInfo} is described in the - <> table. - * _input_value_size_ is used to specify the size in bytes of memory pointed to - by _input_value_. - This size must be == size of input type as described in the table below. - * _input_value_ is a pointer to memory where the appropriate parameterization - of the query is passed from. - If _input_value_ is `NULL`, it is ignored. + _param_value_ by {clGetProgramInfo} is described in the + <> table. * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. * _param_value_size_ is used to specify the size in bytes of memory pointed to by _param_value_. This size must be {geq} size of return type as described in the - <> table. + <> table. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. -[[kernel-sub-group-info-table]] -.List of supported param_names by {clGetKernelSubGroupInfo} -[width="100%",cols="<25%,<25%,<25%,<25%",options="header"] +[[program-info-table]] +.List of supported param_names by {clGetProgramInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] |==== -| Kernel Sub-group Info | Input Type | Return Type | Description -| {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_anchor} +| Program Info | Return Type | Description +| {CL_PROGRAM_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] -include::{generated}/api/version-notes/CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE.asciidoc[] -Also see extension *cl_khr_subgroups*. - | {size_t_TYPE}* - | {size_t_TYPE} - | Returns the maximum sub-group size for this kernel. - All sub-groups must be the same size, while the last sub-group in - any work-group (i.e. the sub-group with the maximum index) could - be the same or smaller size. +include::{generated}/api/version-notes/CL_PROGRAM_REFERENCE_COUNT.asciidoc[] + | {cl_uint_TYPE} + | Return the _program_ reference count. +| {CL_PROGRAM_CONTEXT_anchor} - The _input_value_ must be an array of {size_t_TYPE} values - corresponding to the local work size parameter of the intended - dispatch. - The number of dimensions in the ND-range will be inferred from - the value specified for _input_value_size_. -| {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_anchor} +include::{generated}/api/version-notes/CL_PROGRAM_CONTEXT.asciidoc[] + | {cl_context_TYPE} + | Return the context specified when the program object is created +| {CL_PROGRAM_NUM_DEVICES_anchor} -include::{generated}/api/version-notes/CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE.asciidoc[] -Also see extension *cl_khr_subgroups*. - | {size_t_TYPE}* - | {size_t_TYPE} - | Returns the number of sub-groups that will be present in each - work-group for a given local work size. - All workgroups, apart from the last work-group in each dimension - in the presence of non-uniform work-group sizes, will have the - same number of sub-groups. +include::{generated}/api/version-notes/CL_PROGRAM_NUM_DEVICES.asciidoc[] + | {cl_uint_TYPE} + | Return the number of devices associated with _program_. +| {CL_PROGRAM_DEVICES_anchor} - The _input_value_ must be an array of {size_t_TYPE} values - corresponding to the local work size parameter of the intended - dispatch. - The number of dimensions in the ND-range will be inferred from - the value specified for _input_value_size_. -| {CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT_anchor} +include::{generated}/api/version-notes/CL_PROGRAM_DEVICES.asciidoc[] + | {cl_device_id_TYPE}[] + | Return the list of devices associated with the program object. + This can be the devices associated with context on which the program + object has been created or can be a subset of devices that are + specified when a program object is created using + {clCreateProgramWithBinary}. +| {CL_PROGRAM_SOURCE_anchor} -include::{generated}/api/version-notes/CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT.asciidoc[] -Also see extension *cl_khr_subgroups*. +include::{generated}/api/version-notes/CL_PROGRAM_SOURCE.asciidoc[] + | {char_TYPE}[] + | Return the program source code specified by + {clCreateProgramWithSource}. + The source string returned is a concatenation of all source strings + specified to {clCreateProgramWithSource} with a null terminator. + The concatenation strips any nulls in the original source strings. + + If _program_ is created using {clCreateProgramWithBinary}, + {clCreateProgramWithIL}, +ifdef::cl_khr_il_program[{clCreateProgramWithILKHR},] + or {clCreateProgramWithBuiltInKernels}, a null string or the + appropriate program source code is returned depending on whether or + not the program source code is stored in the binary. + + The actual number of characters that represents the program source + code including the null terminator is returned in + _param_value_size_ret_. +| {CL_PROGRAM_IL_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_IL.asciidoc[] + +ifdef::cl_khr_il_program[] +{CL_PROGRAM_IL_KHR_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_IL_KHR.asciidoc[] +endif::cl_khr_il_program[] + | {char_TYPE}[] + | Returns the program IL for programs created with +ifdef::cl_khr_il_program[{clCreateProgramWithILKHR} or] + {clCreateProgramWithIL}. + + If _program_ is created with {clCreateProgramWithSource}, + {clCreateProgramWithBinary} or {clCreateProgramWithBuiltInKernels} + the memory pointed to by param_value will be unchanged and + _param_value_size_ret_ will be set to 0. +| {CL_PROGRAM_BINARY_SIZES_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_BINARY_SIZES.asciidoc[] + | {size_t_TYPE}[] + | Returns an array that contains the size in bytes of the program + binary (could be an executable binary, compiled binary or library + binary) for each device associated with program. + The size of the array is the number of devices associated with + program. + If a binary is not available for a device(s), a size of zero is + returned. + + If _program_ is created using {clCreateProgramWithBuiltInKernels}, + the implementation may return zero in any entries of the returned + array. +| {CL_PROGRAM_BINARIES_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_BINARIES.asciidoc[] + | {unsigned_char_TYPE}*[] + | Return the program binaries (could be an executable binary, compiled + binary or library binary) for all devices associated with program. + For each device in program, the binary returned can be the binary + specified for the device when program is created with + {clCreateProgramWithBinary} or it can be the executable binary + generated by {clBuildProgram} or {clLinkProgram}. + If _program_ is created with {clCreateProgramWithSource} or + {clCreateProgramWithIL}, the binary returned is the binary generated + by {clBuildProgram}, {clCompileProgram} or {clLinkProgram}. + The bits returned can be an implementation-specific intermediate + representation (a.k.a. IR) or device specific executable bits or + both. + The decision on which information is returned in the binary is up to + the OpenCL implementation. + + param_value points to an array of `n` pointers allocated by the + caller, where `n` is the number of devices associated with program. + The buffer sizes needed to allocate the memory that these `n` + pointers refer to can be queried using the {CL_PROGRAM_BINARY_SIZES} + query as described in this table. + + Each entry in this array is used by the implementation as the + location in memory where to copy the program binary for a specific + device, if there is a binary available. + To find out which device the program binary in the array refers to, + use the {CL_PROGRAM_DEVICES} query to get the list of devices. + There is a one-to-one correspondence between the array of n pointers + returned by {CL_PROGRAM_BINARIES} and array of devices returned by + {CL_PROGRAM_DEVICES}. +| {CL_PROGRAM_NUM_KERNELS_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_NUM_KERNELS.asciidoc[] | {size_t_TYPE} - | {size_t_TYPE}[] - | Returns the local size that will generate the requested number - of sub-groups for the kernel. - The output array must be an array of {size_t_TYPE} values corresponding - to the local size parameter. - Any returned work-group will have one dimension. - Other dimensions inferred from the value specified for - param_value_size will be filled with the value 1. - The returned value will produce an exact number of sub-groups - and result in no partial groups for an executing kernel except - in the case where the last work-group in a dimension has a size - different from that of the other groups. - If no work-group size can accommodate the requested number of - sub-groups, 0 will be returned in each element of the return - array. -| {CL_KERNEL_MAX_NUM_SUB_GROUPS_anchor} + | Returns the number of kernels declared in _program_ that can be + created with {clCreateKernel}. + This information is only available after a successful program + executable has been built for at least one device in the list of + devices associated with _program_. +| {CL_PROGRAM_KERNEL_NAMES_anchor} -include::{generated}/api/version-notes/CL_KERNEL_MAX_NUM_SUB_GROUPS.asciidoc[] -Also see extension *cl_khr_subgroups*. - | ignored - | {size_t_TYPE} - | This provides a mechanism for the application to query the - maximum number of sub-groups that may make up each work-group to - execute a kernel on a specific device given by device. - The OpenCL implementation uses the resource requirements of the - kernel (register usage etc.) to determine what this work-group - size should be. - The returned value may be used to compute a work-group size to - enqueue the kernel with to give a round number of sub-groups for - an enqueue. -| {CL_KERNEL_COMPILE_NUM_SUB_GROUPS_anchor} +include::{generated}/api/version-notes/CL_PROGRAM_KERNEL_NAMES.asciidoc[] + | {char_TYPE}[] + | Returns a semi-colon separated list of kernel names in _program_ + that can be created with {clCreateKernel}. + This information is only available after a successful program + executable has been built for at least one device in the list of + devices associated with _program_. +| {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT_anchor} -include::{generated}/api/version-notes/CL_KERNEL_COMPILE_NUM_SUB_GROUPS.asciidoc[] -Also see extension *cl_khr_subgroups*. - | ignored - | {size_t_TYPE} - | Returns the number of sub-groups per work-group specified in the kernel - source or IL. If the sub-group count is not specified then 0 is returned. +include::{generated}/api/version-notes/CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT.asciidoc[] + | {cl_bool_TYPE} + | This indicates that the _program_ object contains non-trivial + constructor(s) that will be executed by runtime before any kernel + from the program is executed. + This information is only available after a successful program + executable has been built for at least one device in the list of + devices associated with _program_. + + Querying {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT} may unconditionally + return {CL_FALSE} if no devices associated with _program_ support + constructors for program scope global variables. + Support for constructors and destructors for program scope global + variables is required only for OpenCL 2.2 devices. +| {CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT_anchor} + +include::{generated}/api/version-notes/CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT.asciidoc[] + | {cl_bool_TYPE} + | This indicates that the program object contains non-trivial + destructor(s) that will be executed by runtime when _program_ is + destroyed. + This information is only available after a successful program + executable has been built for at least one device in the list of + devices associated with _program_. + + Querying {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT} may unconditionally + return {CL_FALSE} if no devices associated with _program_ support + destructors for program scope global variables. + Support for constructors and destructors for program scope global + variables is required only for OpenCL 2.2 devices. |==== // refError -{clGetKernelSubGroupInfo} returns {CL_SUCCESS} if the function is executed +{clGetProgramInfo} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated - with _kernel_ or if _device_ is `NULL` but there is more than one device - associated with _kernel_. - * {CL_INVALID_OPERATION} if _device_ does not support sub-groups. * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes specified by _param_value_size_ is < size of return type as described in - the <> table - and _param_value_ is not `NULL`. - * {CL_INVALID_VALUE} if _param_name_ is - {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE}, - {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE} or - {CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT} and the size in bytes specified - by _input_value_size_ is not valid or if _input_value_ is `NULL`. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + the <> table and + _param_value_ is not `NULL`. + * {CL_INVALID_PROGRAM} if _program_ is a not a valid program object. + * {CL_INVALID_PROGRAM_EXECUTABLE} if _param_name_ is + {CL_PROGRAM_NUM_KERNELS}, {CL_PROGRAM_KERNEL_NAMES}, + {CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT}, or + {CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT} and a successful program executable + has not been built for at least one device in the list of devices + associated with _program_. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clGetKernelArgInfo',desc='Returns information about the arguments of a kernel.',type='protos'] +[open,refpage='clGetProgramBuildInfo',desc='Returns build information for each device in the program object.',type='protos'] -- -To return information about the arguments of a kernel, call the function +To return build information for each device in the program object, call the +function -include::{generated}/api/protos/clGetKernelArgInfo.txt[] -include::{generated}/api/version-notes/clGetKernelArgInfo.asciidoc[] +include::{generated}/api/protos/clGetProgramBuildInfo.txt[] +include::{generated}/api/version-notes/clGetProgramBuildInfo.asciidoc[] - * _kernel_ specifies the kernel object being queried. - * _arg_index_ is the argument index. - Arguments to the kernel are referred by indices that go from 0 for the - leftmost argument to _n_ - 1, where _n_ is the total number of arguments - declared by a kernel. - * _param_name_ specifies the argument information to query. + * _program_ specifies the program object being queried. + * _device_ specifies the device for which build information is being queried. + _device_ must be a valid device associated with _program_. + * _param_name_ specifies the information to query. The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetKernelArgInfo} is described in the - <> table. + _param_value_ by {clGetProgramBuildInfo} is described in the + <> table. * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. * _param_value_size_ is used to specify the size in bytes of memory pointed to by _param_value_. - This size must be > size of return type as described in the - <> table. - * _param_value_size ret_ returns the actual size in bytes of data being + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. -Kernel argument information is only available if the program object -associated with _kernel_ is created with {clCreateProgramWithSource} and the -program executable was built with the `-cl-kernel-arg-info option` specified -in options argument to {clBuildProgram} or {clCompileProgram}. - -[[kernel-argument-info-table]] -.List of supported param_names by {clGetKernelArgInfo} +[[program-build-info-table]] +.List of supported param_names by {clGetProgramBuildInfo} [width="100%",cols="<33%,<17%,<50%",options="header"] |==== -| Kernel Arg Info | Return Type | Description -| {CL_KERNEL_ARG_ADDRESS_QUALIFIER_anchor} - -include::{generated}/api/version-notes/CL_KERNEL_ARG_ADDRESS_QUALIFIER.asciidoc[] - | {cl_kernel_arg_address_qualifier_TYPE} - | Returns the address qualifier specified for the argument given by - _arg_index_. - This can be one of the following values: +| Program Build Info | Return Type | Description +| {CL_PROGRAM_BUILD_STATUS_anchor} - {CL_KERNEL_ARG_ADDRESS_GLOBAL_anchor} + - {CL_KERNEL_ARG_ADDRESS_LOCAL_anchor} + - {CL_KERNEL_ARG_ADDRESS_CONSTANT_anchor} + - {CL_KERNEL_ARG_ADDRESS_PRIVATE_anchor} +include::{generated}/api/version-notes/CL_PROGRAM_BUILD_STATUS.asciidoc[] + | {cl_build_status_TYPE} + | Returns the build, compile or link status, whichever was performed + last on the specified _program_ object for _device_. - If no address qualifier is specified, the default address qualifier - which is {CL_KERNEL_ARG_ADDRESS_PRIVATE} is returned. -| {CL_KERNEL_ARG_ACCESS_QUALIFIER_anchor} + This can be one of the following: -include::{generated}/api/version-notes/CL_KERNEL_ARG_ACCESS_QUALIFIER.asciidoc[] - | {cl_kernel_arg_access_qualifier_TYPE} - | Returns the access qualifier specified for the argument given by - _arg_index_. - This can be one of the following values: + {CL_BUILD_NONE_anchor} - The build status returned if no {clBuildProgram}, + {clCompileProgram} or {clLinkProgram} has been performed on the + specified _program_ object for _device_). - {CL_KERNEL_ARG_ACCESS_READ_ONLY_anchor} + - {CL_KERNEL_ARG_ACCESS_WRITE_ONLY_anchor} + - {CL_KERNEL_ARG_ACCESS_READ_WRITE_anchor} + - {CL_KERNEL_ARG_ACCESS_NONE_anchor} + {CL_BUILD_ERROR_anchor} - The build status returned if {clBuildProgram}, + {clCompileProgram} or {clLinkProgram} - whichever was performed last + on the specified _program_ object for _device_ - generated an error. - If argument is not an image type and is not declared with the pipe - qualifier, {CL_KERNEL_ARG_ACCESS_NONE} is returned. - If argument is an image type, the access qualifier specified or the - default access qualifier is returned. -| {CL_KERNEL_ARG_TYPE_NAME_anchor} + {CL_BUILD_SUCCESS_anchor} - The build status returned if {clBuildProgram}, + {clCompileProgram} or {clLinkProgram} - whichever was performed last + on the specified _program_ object for _device_ - was successful. -include::{generated}/api/version-notes/CL_KERNEL_ARG_TYPE_NAME.asciidoc[] - | {char_TYPE}[] - | Returns the type name specified for the argument given by - _arg_index_. - The type name returned will be the argument type name as it was - declared with any whitespace removed. - If argument type name is an unsigned scalar type (i.e. unsigned - char, unsigned short, unsigned int, unsigned long), uchar, ushort, - uint and ulong will be returned. - The argument type name returned does not include any type - qualifiers. -| {CL_KERNEL_ARG_TYPE_QUALIFIER_anchor} + {CL_BUILD_IN_PROGRESS_anchor} - The build status returned if + {clBuildProgram}, {clCompileProgram} or {clLinkProgram} - whichever + was performed last on the specified _program_ object for _device_ - has + not finished. +| {CL_PROGRAM_BUILD_OPTIONS_anchor} -include::{generated}/api/version-notes/CL_KERNEL_ARG_TYPE_QUALIFIER.asciidoc[] - | {cl_kernel_arg_type_qualifier_TYPE} - | Returns a bitfield describing one or more type qualifiers specified - for the argument given by _arg_index_. - The returned values can be: - - {CL_KERNEL_ARG_TYPE_CONST_anchor} - footnote:[{fn-kernel-arg-type-qualifier}] - footnote:[{fn-kernel-arg-type-const-addr-space}] + - {CL_KERNEL_ARG_TYPE_RESTRICT_anchor} + - {CL_KERNEL_ARG_TYPE_VOLATILE_anchor} + - {CL_KERNEL_ARG_TYPE_PIPE_anchor}, or + - {CL_KERNEL_ARG_TYPE_NONE_anchor} +include::{generated}/api/version-notes/CL_PROGRAM_BUILD_OPTIONS.asciidoc[] + | {char_TYPE}[] + | Return the build, compile or link options specified by the options + argument in {clBuildProgram}, {clCompileProgram} or {clLinkProgram}, + whichever was performed last on the specified _program_ object for + _device_. - {CL_KERNEL_ARG_TYPE_NONE} is returned for all parameters passed by - value. -| {CL_KERNEL_ARG_NAME_anchor} + If build status of the specified _program_ for _device_ is + {CL_BUILD_NONE}, an empty string is returned. +| {CL_PROGRAM_BUILD_LOG_anchor} -include::{generated}/api/version-notes/CL_KERNEL_ARG_NAME.asciidoc[] +include::{generated}/api/version-notes/CL_PROGRAM_BUILD_LOG.asciidoc[] | {char_TYPE}[] - | Returns the name specified for the argument given by _arg_index_. -|==== + | Return the build, compile or link log for {clBuildProgram}, + {clCompileProgram} or {clLinkProgram}, whichever was performed last + on program for device. -{clGetKernelArgInfo} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: + If build status of the specified _program_ for _device_ is + {CL_BUILD_NONE}, an empty string is returned. +| {CL_PROGRAM_BINARY_TYPE_anchor} - * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_ size is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_KERNEL_ARG_INFO_NOT_AVAILABLE} if the argument information is not - available for kernel. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. --- +include::{generated}/api/version-notes/CL_PROGRAM_BINARY_TYPE.asciidoc[] + | {cl_program_binary_type_TYPE} + | Return the program binary type for device. + This can be one of the following values: + {CL_PROGRAM_BINARY_TYPE_NONE_anchor} - There is no binary associated + with the specified _program_ object for _device_. -== Executing Kernels + {CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT_anchor} - A compiled binary is + associated with _device_. + This is the case when the specified _program_ object was created using + {clCreateProgramWithSource} and compiled using {clCompileProgram}, or + when a compiled binary was loaded using {clCreateProgramWithBinary}. -[open,refpage='clEnqueueNDRangeKernel',desc='Enqueues a command to execute a kernel on a device.',type='protos'] --- -To enqueue a command to execute a kernel on a device, call the function + {CL_PROGRAM_BINARY_TYPE_LIBRARY_anchor} - A library binary is + associated with _device_. + This is the case when the specified _program_ object was linked by + {clLinkProgram} using the `-create-library` link option, or when a + compiled library binary was loaded using {clCreateProgramWithBinary}. -include::{generated}/api/protos/clEnqueueNDRangeKernel.txt[] -include::{generated}/api/version-notes/clEnqueueNDRangeKernel.asciidoc[] + {CL_PROGRAM_BINARY_TYPE_EXECUTABLE_anchor} - An executable binary is + associated with _device_. + This is the case when the specified _program_ object was linked by + {clLinkProgram} without the `-create-library` link option, or when an + executable binary was built using {clBuildProgram}. - * _command_queue_ is a valid host command-queue. - The kernel will be queued for execution on the device associated with - _command_queue_. - * _kernel_ is a valid kernel object. - The OpenCL context associated with _kernel_ and _command-queue_ must be the - same. - * _work_dim_ is the number of dimensions used to specify the global work-items - and work-items in the work-group. - _work_dim_ must be greater than zero and less than or equal to - {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}. - If _global_work_size_ is `NULL`, or the value in any passed dimension is 0 - then the kernel command will trivially succeed after its event dependencies - are satisfied and subsequently update its completion event. - The behavior in this situation is similar to that of an enqueued marker, - except that unlike a marker, an enqueued kernel with no events passed to - _event_wait_list_ may run at any time. - * _global_work_offset_ can be used to specify an array of _work_dim_ unsigned - values that describe the offset used to calculate the global ID of a - work-item. - If _global_work_offset_ is `NULL`, the global IDs start at offset (0, 0, 0). - _global_work_offset_ must be `NULL` <> version 1.1. - * _global_work_size_ points to an array of _work_dim_ unsigned values that - describe the number of global work-items in _work_dim_ dimensions that will - execute the kernel function. - The total number of global work-items is computed as _global_work_size_[0] - {times} ... {times} _global_work_size_[_work_dim_ - 1]. - * _local_work_size_ points to an array of _work_dim_ unsigned values that - describe the number of work-items that make up a work-group (also referred - to as the size of the work-group) that will execute the kernel specified by - _kernel_. - The total number of work-items in a work-group is computed as - _local_work_size_[0] {times} ... {times} _local_work_size_[_work_dim_ - 1]. - The total number of work-items in the work-group must be less than or equal - to the {CL_KERNEL_WORK_GROUP_SIZE} value specified in the - <> table, and the - number of work-items specified in _local_work_size_[0], ..., - _local_work_size_[_work_dim_ - 1] must be less than or equal to the - corresponding values specified by {CL_DEVICE_MAX_WORK_ITEM_SIZES}[0], ..., - {CL_DEVICE_MAX_WORK_ITEM_SIZES}[_work_dim_ - 1]. - The explicitly specified _local_work_size_ will be used to determine how to - break the global work-items specified by _global_work_size_ into appropriate - work-group instances. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. - If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. - If _event_wait_list_ is not `NULL`, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. +ifdef::cl_khr_spir[] + {CL_PROGRAM_BINARY_TYPE_INTERMEDIATE_anchor} -- An intermediate + (non-source) representation for the program is loaded as a binary. + The program must be further processed with {clCompileProgram} or + {clBuildProgram}. -An ND-range kernel command may require uniform work-groups or may support non-uniform work-groups. -To support non-uniform work-groups: + If processed with {clCompileProgram}, the result will be a binary of + type {CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT} or + {CL_PROGRAM_BINARY_TYPE_LIBRARY}. + If processed with {clBuildProgram}, the result will be a binary of + type {CL_PROGRAM_BINARY_TYPE_EXECUTABLE}. -. The device associated with _command_queue_ must support non-uniform work-groups. -. The program object associated with _kernel_ must support non-uniform work-groups. -Specifically, this means: -.. If the program was created with {clCreateProgramWithSource}, the program must be compiled or built using the `-cl-std=CL2.0` or `-cl-std=CL3.0` build option and without the `-cl-uniform-work-group-size` build option. -.. If the program was created with {clCreateProgramWithIL} or {clCreateProgramWithBinary}, the program must be compiled or built without the `-cl-uniform-work-group-size` build options. -.. If the program was created using {clLinkProgram}, all input programs must support non-uniform work-groups. +include::{generated}/api/version-notes/CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE.asciidoc[] +endif::cl_khr_spir[] -If non-uniform work-groups are supported, any single dimension -for which the global size is not divisible by the local size will be -partitioned into two regions. -One region will have work-groups that have the same number of work-items as -was specified by the local size parameter in that dimension. -The other region will have work-groups with less than the number of work -items specified by the local size parameter in that dimension. -The global IDs and group IDs of the work-items in the first region will be -numerically lower than those in the second, and the second region will be at -most one work-group wide in that dimension. -Work-group sizes could be non-uniform in multiple dimensions, potentially -producing work-groups of up to 4 different sizes in a 2D range and 8 -different sizes in a 3D range. +| {CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE_anchor} -If non-uniform work-groups are supported and _local_work_size_ is `NULL`, the OpenCL runtime may choose a uniform or non-uniform work-group size. +include::{generated}/api/version-notes/CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE.asciidoc[] + | {size_t_TYPE} + | The total amount of storage, in bytes, used by program variables in + the global address space. +|==== -Otherwise, when non-uniform work-groups are not supported, the size of each work-group must be uniform. -If _local_work_size_ is specified, the values specified in _global_work_size_[0], ..., _global_work_size_[_work_dim_ - 1] must be evenly divisible by the corresponding values specified in _local_work_size_[0], ..., _local_work_size_[_work_dim_ - 1]. -If _local_work_size_ is `NULL`, the OpenCL runtime must choose a uniform work-group size. +// refError -The work-group size to be used for _kernel_ can also be specified in the -program source or intermediate language. -In this case the size of work-group specified by _local_work_size_ must -match the value specified in the program source. +{clGetProgramBuildInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: -These work-group instances are executed in parallel across multiple compute -units or concurrently on the same compute unit. + * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated + with _program_. + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table and + _param_value_ is not `NULL`. + * {CL_INVALID_PROGRAM} if _program_ is a not a valid program object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -Each work-item is uniquely identified by a global identifier. -The global ID, which can be read inside the kernel, is computed using the -value given by _global_work_size_ and _global_work_offset_. -In addition, a work-item is also identified within a work-group by a unique -local ID. -The local ID, which can also be read by the kernel, is computed using the -value given by _local_work_size_. -The starting local ID is always (0, 0, ..., 0). +[NOTE] +==== +A program binary (compiled binary, library binary or executable binary) +built for a parent device can be used by all its sub-devices. +If a program binary has not been built for a sub-device, the program binary +associated with the parent device will be used. -// refError +A program binary for a device specified with {clCreateProgramWithBinary} or +queried using {clGetProgramInfo} can be used as the binary for the +associated root device, and all sub-devices created from the root-level +device or sub-devices thereof. +==== +-- -{clEnqueueNDRangeKernel} returns {CL_SUCCESS} if the kernel-instance was -successfully queued. -Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built program - executable available for device associated with _command_queue_. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. +== Kernel Objects + +A kernel is a function declared in a program. +A kernel is identified by the `+__kernel+` qualifier applied to any function +in a program. +A kernel object encapsulates the specific `+__kernel+` function declared in +a program and the argument values to be used when executing this +`+__kernel+` function. + + +=== Creating Kernel Objects + +[open,refpage='clCreateKernel',desc='Creates a kernel object.',type='protos'] +-- +To create a kernel object, use the function + +include::{generated}/api/protos/clCreateKernel.txt[] +include::{generated}/api/version-notes/clCreateKernel.asciidoc[] + + * _program_ is a program object with a successfully built executable. + * _kernel_name_ is a function name in the program declared with the + `+__kernel+` qualifier. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +// refError + +{clCreateKernel} returns a valid non-zero kernel object and _errcode_ret_ is +set to {CL_SUCCESS} if the kernel object is created successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built + executable for _program_. + * {CL_INVALID_KERNEL_NAME} if _kernel_name_ is not found in _program_. + * {CL_INVALID_KERNEL_DEFINITION} if the function definition for `+__kernel+` + function given by _kernel_name_ such as the number of arguments, the + argument types are not the same for all devices for which the _program_ + executable has been built. + * {CL_INVALID_VALUE} if _kernel_name_ is `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clCreateKernelsInProgram',desc='Creates kernel objects for all kernel functions in a program object.',type='protos'] +-- +To create kernel objects for all kernel functions in a program, +call the function + +include::{generated}/api/protos/clCreateKernelsInProgram.txt[] +include::{generated}/api/version-notes/clCreateKernelsInProgram.asciidoc[] + + * _program_ is a program object with a successfully built executable. + * _num_kernels_ is the size of memory pointed to by _kernels_ specified as the + number of {cl_kernel_TYPE} entries. + * _kernels_ is the buffer where the kernel objects for kernels in _program_ + will be returned. + If _kernels_ is `NULL`, it is ignored. + If _kernels_ is not `NULL`, _num_kernels_ must be greater than or equal to + the number of kernels in _program_. + * _num_kernels_ret_ is the number of kernels in _program_. + If _num_kernels_ret_ is `NULL`, it is ignored. + +Kernel objects are not created for any `+__kernel+` functions in _program_ +that do not have the same function definition across all devices for which a +program executable has been successfully built. + +Kernel objects can only be created once you have a program object with a +valid program source or binary loaded into the program object and the +program executable has been successfully built for one or more devices +associated with program. +No changes to the program executable are allowed while there are kernel +objects associated with a program object. +This means that calls to {clBuildProgram} and {clCompileProgram} return +{CL_INVALID_OPERATION} if there are kernel objects attached to a program +object. +The OpenCL context associated with _program_ will be the context associated +with _kernel_. +The list of devices associated with _program_ are the devices associated +with _kernel_. +Devices associated with a program object for which a valid program +executable has been built can be used to execute kernels declared in the +program object. + +// refError + +{clCreateKernelsInProgram} will return {CL_SUCCESS} if the kernel objects were +successfully allocated. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_PROGRAM} if _program_ is not a valid program object. + * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built + executable for any device in _program_. + * {CL_INVALID_VALUE} if _kernels_ is not `NULL` and _num_kernels_ is less + than the number of kernels in _program_. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +-- + +[open,refpage='clRetainKernel',desc='Increments the kernel object reference count.',type='protos'] +-- +To retain a kernel object, call the function + +include::{generated}/api/protos/clRetainKernel.txt[] +include::{generated}/api/version-notes/clRetainKernel.asciidoc[] + + * _kernel_ is the kernel object to be retained. + +The _kernel_ reference count is incremented. + +// refError + +{clRetainKernel} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and - _kernel_ are not the same or if the context associated with - _command_queue_ and events in _event_wait_list_ are not the same. - * {CL_INVALID_KERNEL_ARGS} if the kernel argument values have not been - specified. - * {CL_INVALID_WORK_DIMENSION} if _work_dim_ is not a valid value (i.e. a - value between 1 and {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}). - * {CL_INVALID_GLOBAL_WORK_SIZE} if _global_work_size_ is NULL or if any of - the values specified in _global_work_size_[0], ... - _global_work_size_[_work_dim_ - 1] are 0. - Returning this error code under these circumstances is <> version 2.1. - * {CL_INVALID_GLOBAL_WORK_SIZE} if any of the values specified in - _global_work_size_[0], ... _global_work_size_[_work_dim_ - 1] exceed the - maximum value representable by {size_t_TYPE} on the device on which the - kernel-instance will be enqueued. - * {CL_INVALID_GLOBAL_OFFSET} if the value specified in _global_work_size_ - {plus} the corresponding values in _global_work_offset_ for any - dimensions is greater than the maximum value representable by size t on - the device on which the kernel-instance will be enqueued, or if - _global_work_offset_ is non-`NULL` <> version 1.1. - * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and does - not match the required work-group size for _kernel_ in the program - source. - * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and is not - consistent with the required number of sub-groups for _kernel_ in the - program source. - * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and the - total number of work-items in the work-group computed as - _local_work_size_[0] {times} ... _local_work_size_[_work_dim_ - 1] is - greater than the value specified by {CL_KERNEL_WORK_GROUP_SIZE} in the - <> table. - * {CL_INVALID_WORK_GROUP_SIZE} if the work-group size must be uniform and - the _local_work_size_ is not `NULL`, is not equal to the required - work-group size specified in the kernel source, or the - _global_work_size_ is not evenly divisible by the _local_work_size_. - * {CL_INVALID_WORK_ITEM_SIZE} if the number of work-items specified in any - of _local_work_size_[0], ... _local_work_size_[_work_dim_ - 1] is - greater than the corresponding values specified by - {CL_DEVICE_MAX_WORK_ITEM_SIZES}[0], ..., - {CL_DEVICE_MAX_WORK_ITEM_SIZES}[_work_dim_ - 1]. - * {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is specified as - the value for an argument that is a buffer object and the _offset_ - specified when the sub-buffer object is created is not aligned to - {CL_DEVICE_MEM_BASE_ADDR_ALIGN} value for device associated with _queue_. - This error code is <> version 1.1. - * {CL_INVALID_IMAGE_SIZE} if an image object is specified as an argument - value and the image dimensions (image width, height, specified or - compute row and/or slice pitch) are not supported by device associated - with _queue_. - * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is specified as an - argument value and the image format (image channel order and data type) - is not supported by device associated with _queue_. - * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution - instance of _kernel_ on the command-queue because of insufficient - resources needed to execute the kernel. - For example, the explicitly specified _local_work_size_ causes a failure - to execute the kernel because of insufficient resources such as - registers or local memory. - Another example would be the number of read-only image args used in - _kernel_ exceed the {CL_DEVICE_MAX_READ_IMAGE_ARGS} value for device or - the number of write-only and read-write image args used in _kernel_ - exceed the {CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS} value for device or the - number of samplers used in _kernel_ exceed {CL_DEVICE_MAX_SAMPLERS} for - device. - * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate - memory for data store associated with image or buffer objects specified - as arguments to _kernel_. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel - and the device does not support SVM or if system pointers are passed as - arguments to a kernel and/or stored inside SVM allocations passed as - kernel arguments and the device does not support fine grain system SVM - allocations. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +{clCreateKernel} or {clCreateKernelsInProgram} do an implicit retain. +-- + +[open,refpage='clReleaseKernel',desc='Decrements the kernel reference count.',type='protos'] +-- +To release a kernel object, call the function + +include::{generated}/api/protos/clReleaseKernel.txt[] +include::{generated}/api/version-notes/clReleaseKernel.asciidoc[] + + * _kernel_ is the kernel object to be released. + +The _kernel_ reference count is decremented. + +The kernel object is deleted once the number of instances that are retained +to _kernel_ become zero and the kernel object is no longer needed by any +enqueued commands that use _kernel_. +Using this function to release a reference that was not obtained by creating +the object or by calling {clRetainKernel} causes undefined behavior. + +// refError + +{clReleaseKernel} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +[[setting-kernel-arguments]] +=== Setting Kernel Arguments + +To execute a kernel, the kernel arguments must be set. + +[open,refpage='clSetKernelArg',desc='Set the argument value for a specific argument of a kernel.',type='protos'] +-- +To set the argument value for a specific argument of a kernel, call the +function + +include::{generated}/api/protos/clSetKernelArg.txt[] +include::{generated}/api/version-notes/clSetKernelArg.asciidoc[] + + * _kernel_ is a valid kernel object. + * _arg_index_ is the argument index. + Arguments to the kernel are referred by indices that go from 0 for the + leftmost argument to _n_ - 1, where _n_ is the total number of arguments + declared by a kernel (see below). + * _arg_size_ specifies the size of the argument value. + If the argument is a memory object, the _arg_size_ value must be equal to + `sizeof({cl_mem_TYPE})`. + For arguments declared with the `local` qualifier, the size specified will + be the size in bytes of the buffer that must be allocated for the `local` + argument. + If the argument is of type _sampler_t_, the _arg_size_ value must be equal + to `sizeof({cl_sampler_TYPE})`. + If the argument is of type _queue_t_, the _arg_size_ value must be equal to + `sizeof({cl_command_queue_TYPE})`. + For all other arguments, the size will be the size of argument type. + * _arg_value_ is a pointer to data that should be used as the argument value + for argument specified by _arg_index_. + The argument data pointed to by _arg_value_ is copied and the _arg_value_ + pointer can therefore be reused by the application after {clSetKernelArg} + returns. + The argument value specified is the value used by all API calls that enqueue + _kernel_ ({clEnqueueNDRangeKernel} and {clEnqueueTask}) until the argument + value is changed by a call to {clSetKernelArg} for _kernel_. + +For example, consider the following kernel: + +[source,opencl_c] +---- +kernel void image_filter (int n, + int m, + constant float *filter_weights, + read_only image2d_t src_image, + write_only image2d_t dst_image) +{ +... +} +---- + +Argument index values for `image_filter` will be 0 for `n`, 1 for `m`, 2 for +`filter_weights`, 3 for `src_image` and 4 for `dst_image`. + +If the argument is a memory object (buffer, pipe, image or image array), the +_arg_value_ entry will be a pointer to the appropriate buffer, pipe, image +or image array object. +The memory object must be created with the context associated with the +kernel object. +If the argument is a buffer object, the _arg_value_ pointer can be `NULL` or +point to a `NULL` value in which case a `NULL` value will be used as the +value for the argument declared as a pointer to `global` or `constant` +memory in the kernel. +If the argument is declared with the `local` qualifier, the _arg_value_ +entry must be `NULL`. +If the argument is of type _sampler_t_, the _arg_value_ entry must be a +pointer to the sampler object. +If the argument is of type _queue_t_, the _arg_value_ entry must be a +pointer to the device queue object. + +ifdef::cl_khr_gl_msaa_sharing[] +If the `<>` extension is supported, then: +If the argument is a multi-sample 2D image, the _arg_value_ entry must be a +pointer to a multi-sample image object. +If the argument is a multi-sample 2D depth image, the _arg_value_ entry must +be a pointer to a multisample depth image object. +If the argument is a multi-sample 2D image array, the _arg_value_ entry must +be a pointer to a multi-sample image array object. +If the argument is a multi-sample 2D depth image array, the _arg_value_ +entry must be a pointer to a multi-sample depth image array object. +endif::cl_khr_gl_msaa_sharing[] + +If the argument is declared to be a pointer of a built-in scalar or vector +type, or a user defined structure type in the global or constant address +space, the memory object specified as argument value must be a buffer object +(or `NULL`). +If the argument is declared with the `constant` qualifier, the size in bytes +of the memory object cannot exceed {CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE} and +the number of arguments declared as pointers to `constant` memory cannot +exceed {CL_DEVICE_MAX_CONSTANT_ARGS}. + +The memory object specified as argument value must be a pipe object if the +argument is declared with the _pipe_ qualifier. + +The memory object specified as argument value must be a 2D image object if +the argument is declared to be of type _image2d_t_. +The memory object specified as argument value must be a 2D image object with +image channel order = {CL_DEPTH} if the argument is declared to be of type +_image2d_depth_t_. +The memory object specified as argument value must be a 3D image object if +argument is declared to be of type _image3d_t_. +The memory object specified as argument value must be a 1D image object if +the argument is declared to be of type _image1d_t_. +The memory object specified as argument value must be a 1D image buffer +object if the argument is declared to be of type _image1d_buffer_t_. +The memory object specified as argument value must be a 1D image array +object if argument is declared to be of type _image1d_array_t_. +The memory object specified as argument value must be a 2D image array +object if argument is declared to be of type _image2d_array_t_. +The memory object specified as argument value must be a 2D image array +object with image channel order = {CL_DEPTH} if argument is declared to be of +type _image2d_array_depth_t_. + +For all other kernel arguments, the _arg_value_ entry must be a pointer to +the actual data to be used as argument value. + +[NOTE] +==== +A kernel object does not update the reference count for objects such as +memory or sampler objects specified as argument values by {clSetKernelArg}. +Users may not rely on a kernel object to retain objects specified as +argument values to the kernel. + +Implementations shall not allow {cl_kernel_TYPE} objects to hold reference +counts to {cl_kernel_TYPE} arguments, because no mechanism is provided for the +user to tell the kernel to release that ownership right. +If the kernel holds ownership rights on kernel args, that would make it +impossible for users to tell with certainty when they may safely +release user allocated resources associated with OpenCL objects such as +the {cl_mem_TYPE} backing store used with {CL_MEM_USE_HOST_PTR}. +==== + +// refError + +{clSetKernelArg} returns {CL_SUCCESS} if the function was executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. + * {CL_INVALID_ARG_VALUE} if _arg_value_ specified is not a valid value. + * {CL_INVALID_MEM_OBJECT} for an argument declared to be a memory object + when the specified _arg_value_ is not a valid memory object. +ifdef::cl_khr_depth_images,cl_khr_gl_msaa_sharing[] + * {CL_INVALID_MEM_OBJECT} for an argument declared to be a +ifdef::cl_khr_depth_images[] + depth image, depth image array, +endif::cl_khr_depth_images[] +ifdef::cl_khr_gl_msaa_sharing[] + multi-sample image, multi-sample image array, multi-sample depth image, + or a multi-sample depth image array +endif::cl_khr_gl_msaa_sharing[] + when the specified _arg_value_ does not follow the rules described above + for a depth memory object or memory array object argument. +endif::cl_khr_depth_images,cl_khr_gl_msaa_sharing[] + * {CL_INVALID_SAMPLER} for an argument declared to be of type _sampler_t_ + when the specified _arg_value_ is not a valid sampler object. + * {CL_INVALID_DEVICE_QUEUE} for an argument declared to be of type _queue_t_ + when the specified _arg_value_ is not a valid device queue object. + This error code is <> version 2.0. + * {CL_INVALID_ARG_SIZE} if _arg_size_ does not match the size of the data + type for an argument that is not a memory object or if the argument is a + memory object and _arg_size_ != `sizeof({cl_mem_TYPE})` or if _arg_size_ is + zero and the argument is declared with the local qualifier or if the + argument is a sampler and _arg_size_ != `sizeof({cl_sampler_TYPE})`. + * {CL_MAX_SIZE_RESTRICTION_EXCEEDED} if the size in bytes of the memory + object (if the argument is a memory object) or _arg_size_ (if the + argument is declared with `local` qualifier) exceeds a language- + specified maximum size restriction for this argument, such as the + *MaxByteOffset* SPIR-V decoration. + This error code is <> version 2.2. + * {CL_INVALID_ARG_VALUE} if the argument is an image declared with the + `read_only` qualifier and _arg_value_ refers to an image object created + with _cl_mem_flags_ of {CL_MEM_WRITE_ONLY} or if the image argument is + declared with the `write_only` qualifier and _arg_value_ refers to an + image object created with _cl_mem_flags_ of {CL_MEM_READ_ONLY}. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +When {clSetKernelArg} returns an error code different from {CL_SUCCESS}, the +internal state of _kernel_ may only be modified when that error code is +{CL_OUT_OF_RESOURCES} or {CL_OUT_OF_HOST_MEMORY}. When the internal state +of _kernel_ is modified, it is implementation-defined whether: + + * The argument value that was previously set is kept so that it can be used in + further kernel enqueues. + * The argument value is unset such that a subsequent kernel enqueue fails with + {CL_INVALID_KERNEL_ARGS}. footnote:[{fn-setkernelarg-prefer-unset-on-error}] +-- + +[open,refpage='clSetKernelArgSVMPointer',desc='Set a SVM pointer as the argument value for a specific argument of a kernel.',type='protos'] +-- +To set a SVM pointer as the argument value for a specific argument of a +kernel, call the function + +include::{generated}/api/protos/clSetKernelArgSVMPointer.txt[] +include::{generated}/api/version-notes/clSetKernelArgSVMPointer.asciidoc[] + + * _kernel_ is a valid kernel object. + * _arg_index_ is the argument index. + Arguments to the kernel are referred by indices that go from 0 for the + leftmost argument to _n_ - 1, where _n_ is the total number of arguments + declared by a kernel. + * _arg_value_ is the SVM pointer that should be used as the argument value for + argument specified by _arg_index_. + The SVM pointer specified is the value used by all API calls that enqueue + _kernel_ ({clEnqueueNDRangeKernel} and {clEnqueueTask}) until the argument + value is changed by a call to {clSetKernelArgSVMPointer} for _kernel_. + The SVM pointer can only be used for arguments that are declared to be a + pointer to `global` or `constant` memory. + The SVM pointer value must be aligned according to the arguments type. + For example, if the argument is declared to be `+global float4 *p+`, the SVM + pointer value passed for `p` must be at a minimum aligned to a `float4`. + The SVM pointer value specified as the argument value can be the pointer + returned by {clSVMAlloc} or can be a pointer offset into the SVM region. + +// refError + +{clSetKernelArgSVMPointer} returns {CL_SUCCESS} if the function was executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_INVALID_OPERATION} if no devices in the context associated with _kernel_ support SVM. + * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. + * {CL_INVALID_ARG_VALUE} if _arg_value_ specified is not a valid value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clSetKernelExecInfo',desc='Pass additional information other than argument values to a kernel.',type='protos'] +-- +To pass additional information other than argument values to a kernel, call +the function + +include::{generated}/api/protos/clSetKernelExecInfo.txt[] +include::{generated}/api/version-notes/clSetKernelExecInfo.asciidoc[] + + * _kernel_ specifies the kernel object being queried. + * _param_name_ specifies the information to be passed to kernel. + The list of supported _param_name_ types and the corresponding values passed + in _param_value_ is described in the <> table. + * _param_value_size_ specifies the size in bytes of the memory pointed to by + _param_value_. + * _param_value_ is a pointer to memory where the appropriate values determined + by _param_name_ are specified. + +[[kernel-exec-info-table]] +.List of supported param_names by {clSetKernelExecInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Kernel Exec Info | Type | Description +| {CL_KERNEL_EXEC_INFO_SVM_PTRS_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_SVM_PTRS.asciidoc[] + | {void_TYPE}*[] + | SVM pointers must reference locations contained entirely within + buffers that are passed to kernel as arguments, or that are passed + through the execution information. + + Non-argument SVM buffers must be specified by passing pointers to + those buffers via {clSetKernelExecInfo} for coarse-grain and + fine-grain buffer SVM allocations but not for finegrain system SVM + allocations. +| {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM.asciidoc[] + | {cl_bool_TYPE} + | This flag indicates whether the kernel uses pointers that are fine + grain system SVM allocations. + These fine grain system SVM pointers may be passed as arguments or + defined in SVM buffers that are passed as arguments to _kernel_. +|==== + +// refError + +{clSetKernelExecInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + * {CL_INVALID_OPERATION} if no devices in the context associated with _kernel_ support SVM. + * {CL_INVALID_VALUE} if _param_name_ is not valid, if _param_value_ is + `NULL` or if the size specified by _param_value_size_ is not valid. + * {CL_INVALID_OPERATION} if _param_name_ is + {CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} and _param_value_ is {CL_TRUE} + but no devices in context associated with _kernel_ support fine-grain + system SVM allocations. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[NOTE] +==== +Coarse-grain or fine-grain buffer SVM pointers used by a kernel which +are not passed as a kernel arguments must be specified using +{clSetKernelExecInfo} with {CL_KERNEL_EXEC_INFO_SVM_PTRS}. +For example, if SVM buffer A contains a pointer to another SVM buffer B, +and the kernel dereferences that pointer, then a pointer to B must +either be passed as an argument in the call to that kernel or it must be +made available to the kernel using {clSetKernelExecInfo}. +For example, we might pass extra SVM pointers as follows: + +[source,opencl] +---- +clSetKernelExecInfo(kernel, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + num_ptrs * sizeof(void *), + extra_svm_ptr_list); +---- + +Here `num_ptrs` specifies the number of additional SVM pointers while +`extra_svm_ptr_list` specifies a pointer to memory containing those SVM +pointers. + +When calling {clSetKernelExecInfo} with {CL_KERNEL_EXEC_INFO_SVM_PTRS} to +specify pointers to non-argument SVM buffers as extra arguments to a kernel, +each of these pointers can be the SVM pointer returned by {clSVMAlloc} or +can be a pointer + offset into the SVM region. +It is sufficient to provide one pointer for each SVM buffer used. + +{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} is used to indicate whether +SVM pointers used by a kernel will refer to system allocations or not. + +{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} = {CL_FALSE} indicates that the +OpenCL implementation may assume that system pointers are not passed as +kernel arguments and are not stored inside SVM allocations passed as kernel +arguments. + +{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM} = {CL_TRUE} indicates that the +OpenCL implementation must assume that system pointers might be passed as +kernel arguments and/or stored inside SVM allocations passed as kernel +arguments. +In this case, if the device to which the kernel is enqueued does not support +system SVM pointers, {clEnqueueNDRangeKernel} and {clEnqueueTask} will return a +{CL_INVALID_OPERATION} error. +If none of the devices in the context associated with kernel support +fine-grain system SVM allocations, {clSetKernelExecInfo} will return a +{CL_INVALID_OPERATION} error. + +If {clSetKernelExecInfo} has not been called with a value for +{CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM}, the default value is used for +this kernel attribute. +The default value depends on whether the device on which the kernel is +enqueued supports fine-grain system SVM allocations. +If so, the default value used is {CL_TRUE} (system pointers might be passed); +otherwise, the default is {CL_FALSE}. + +A call to {clSetKernelExecInfo} for a given value of _param_name_ +replaces any prior value passed for that value of _param_name_. +Only one _param_value_ will be stored for each value of _param_name_. +==== + + +=== Copying Kernel Objects + +NOTE: Copying kernel objects is <> version 2.1. + +[open,refpage='clCloneKernel',desc='Make a shallow copy of the kernel object.',type='protos'] +-- +To clone a kernel object, call the function + +include::{generated}/api/protos/clCloneKernel.txt[] +include::{generated}/api/version-notes/clCloneKernel.asciidoc[] + + * _source_kernel_ is a valid {cl_kernel_TYPE} object that will be copied. + _source_kernel_ will not be modified in any way by this function. + * _errcode_ret_ will be assigned an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +Cloning is used to make a shallow copy of the kernel object, its arguments +and any information passed to the kernel object using {clSetKernelExecInfo}. +If the kernel object was ready to be enqueued before copying it, the clone +of the kernel object is ready to enqueue. + +The returned kernel object is an exact copy of _source_kernel_, with one +caveat: the reference count on the returned kernel object is set as if it +had been returned by {clCreateKernel}. +The reference count of _source_kernel will_ not be changed. + +The resulting kernel will be in the same state as if {clCreateKernel} is +called to create the resultant kernel with the same arguments as those used +to create _source_kernel_, the latest call to {clSetKernelArg} or +{clSetKernelArgSVMPointer} for each argument index applied to kernel and the +last call to {clSetKernelExecInfo} for each value of the param name +parameter are applied to the new kernel object. + +All arguments of the new kernel object must be intact and it may be +correctly used in the same situations as kernel except those that assume a +pre-existing reference count. +Setting arguments on the new kernel object will not affect _source_kernel_ +except insofar as the argument points to a shared underlying entity and in +that situation behavior is as if two kernel objects had been created and the +same argument applied to each. +Only the data stored in the kernel object is copied; data referenced by the +kernels arguments are not copied. +For example, if a buffer or pointer argument is set on a kernel object, the +pointer is copied but the underlying memory allocation is not. + +// refError + +{clCloneKernel} returns a valid non-zero kernel object and _errcode_ret_ is +set to {CL_SUCCESS} if the kernel is successfully copied. +Otherwise it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +=== Kernel Object Queries + +[open,refpage='clGetKernelInfo',desc='Returns information about the kernel object.',type='protos'] +-- +To return information about a kernel object, call the function + +include::{generated}/api/protos/clGetKernelInfo.txt[] +include::{generated}/api/version-notes/clGetKernelInfo.asciidoc[] + + * _kernel_ specifies the kernel object being queried. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetKernelInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[kernel-info-table]] +.List of supported param_names by {clGetKernelInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Kernel Info | Return Type | Description +| {CL_KERNEL_FUNCTION_NAME_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_FUNCTION_NAME.asciidoc[] + | {char_TYPE}[] + | Return the kernel function name. +| {CL_KERNEL_NUM_ARGS_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_NUM_ARGS.asciidoc[] + | {cl_uint_TYPE} + | Return the number of arguments to kernel. +| {CL_KERNEL_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] + +include::{generated}/api/version-notes/CL_KERNEL_REFERENCE_COUNT.asciidoc[] + | {cl_uint_TYPE} + | Return the _kernel_ reference count. +| {CL_KERNEL_CONTEXT_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_CONTEXT.asciidoc[] + | {cl_context_TYPE} + | Return the context associated with _kernel_. +| {CL_KERNEL_PROGRAM_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_PROGRAM.asciidoc[] + | {cl_program_TYPE} + | Return the program object associated with kernel. +| {CL_KERNEL_ATTRIBUTES_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ATTRIBUTES.asciidoc[] + | {char_TYPE}[] + | Returns any attributes specified using the `+__attribute__+` + OpenCL C qualifier (or using an OpenCL {cpp} qualifier syntax [[]] ) + with the kernel function declaration in the program source. + These attributes include attributes described in the earlier OpenCL + C kernel language specifications and other attributes supported by + an implementation. + + Attributes are returned as they were declared inside + `+__attribute__((...))+`, with any surrounding whitespace and + embedded newlines removed. + When multiple attributes are present, they are returned as a single, + space delimited string. + + For kernels not created from OpenCL C source and the + {clCreateProgramWithSource} API call the string returned from this + query will be empty. +|==== + +// refError + +{clGetKernelInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table and _param_value_ + is not `NULL`. + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clGetKernelWorkGroupInfo',desc='Returns information about the kernel object that may be specific to a device.',type='protos'] +-- +To return information about the kernel object that may be specific to a +device, call the function + +include::{generated}/api/protos/clGetKernelWorkGroupInfo.txt[] +include::{generated}/api/version-notes/clGetKernelWorkGroupInfo.asciidoc[] + + * _kernel_ specifies the kernel object being queried. + * _device_ identifies a specific device in the list of devices associated with + _kernel_. + The list of devices is the list of devices in the OpenCL context that is + associated with _kernel_. + If the list of devices associated with _kernel_ is a single device, _device_ + can be a `NULL` value. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetKernelWorkGroupInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[kernel-workgroup-info-table]] +.List of supported param_names by {clGetKernelWorkGroupInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Kernel Work-group Info | Return Type | Description +| {CL_KERNEL_GLOBAL_WORK_SIZE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_GLOBAL_WORK_SIZE.asciidoc[] + | {size_t_TYPE}[3] + | This provides a mechanism for the application to query the maximum + global size that can be used to execute a kernel (i.e. + _global_work_size_ argument to {clEnqueueNDRangeKernel}) on a custom + device given by device or a built-in kernel on an OpenCL device + given by device. + + If device is not a custom device and kernel is not a built-in + kernel, {clGetKernelWorkGroupInfo} returns the error + {CL_INVALID_VALUE}. +| {CL_KERNEL_WORK_GROUP_SIZE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_WORK_GROUP_SIZE.asciidoc[] + | {size_t_TYPE} + | This provides a mechanism for the application to query the maximum + work-group size that can be used to execute the kernel on a specific + device given by device. + The OpenCL implementation uses the resource requirements of the + kernel (register usage etc.) to determine what this work-group size + should be. + + As a result and unlike {CL_DEVICE_MAX_WORK_GROUP_SIZE} this value may + vary from one kernel to another as well as one device to another. + + {CL_KERNEL_WORK_GROUP_SIZE} will be less than or equal to + {CL_DEVICE_MAX_WORK_GROUP_SIZE} for a given kernel object. +| {CL_KERNEL_COMPILE_WORK_GROUP_SIZE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_COMPILE_WORK_GROUP_SIZE.asciidoc[] + | {size_t_TYPE}[3] + | Returns the work-group size specified in the kernel source or IL. + + If the work-group size is not specified in the kernel source or IL, + (0, 0, 0) is returned. +| {CL_KERNEL_LOCAL_MEM_SIZE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_LOCAL_MEM_SIZE.asciidoc[] + | {cl_ulong_TYPE} + | Returns the amount of local memory in bytes being used by a kernel. + This includes local memory that may be needed by an implementation + to execute the kernel, variables declared inside the kernel with the + `+__local+` address qualifier and local memory to be allocated for + arguments to the kernel declared as pointers with the `+__local+` + address qualifier and whose size is specified with {clSetKernelArg}. + + If the local memory size, for any pointer argument to the kernel + declared with the `+__local+` address qualifier, is not specified, + its size is assumed to be 0. +| {CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE.asciidoc[] + | {size_t_TYPE} + | Returns the preferred multiple of work-group size for launch. + This is a performance hint. + Specifying a work-group size that is not a multiple of the value + returned by this query as the value of the local work size argument + to {clEnqueueNDRangeKernel} will not fail to enqueue the kernel for + execution unless the work-group size specified is larger than the + device maximum. +| {CL_KERNEL_PRIVATE_MEM_SIZE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_PRIVATE_MEM_SIZE.asciidoc[] + | {cl_ulong_TYPE} + | Returns the minimum amount of private memory, in bytes, used by each + work-item in the kernel. + This value may include any private memory needed by an + implementation to execute the kernel, including that used by the + language built-ins and variable declared inside the kernel with the + `+__private+` qualifier. +|==== + +// refError + +{clGetKernelWorkGroupInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated + with _kernel_ or if _device_ is `NULL` but there is more than one device + associated with _kernel_. + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table + and _param_value_ is not `NULL`. + * {CL_INVALID_VALUE} if _param_name_ is {CL_KERNEL_GLOBAL_WORK_SIZE} and + _device_ is not a custom device and _kernel_ is not a built-in kernel. + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clGetKernelSubGroupInfo',desc='Returns information about the kernel object.',type='protos'] +-- +To return information about a kernel object, call the function + +include::{generated}/api/protos/clGetKernelSubGroupInfo.txt[] +include::{generated}/api/version-notes/clGetKernelSubGroupInfo.asciidoc[] + +Also see `<>`. + + * _kernel_ specifies the kernel object being queried. + * _device_ identifies a specific device in the list of devices associated with + _kernel_. + The list of devices is the list of devices in the OpenCL context that is + associated with _kernel_. + If the list of devices associated with _kernel_ is a single device, _device_ + can be a `NULL` value. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetKernelSubGroupInfo} is described in the + <> table. + * _input_value_size_ is used to specify the size in bytes of memory pointed to + by _input_value_. + This size must be == size of input type as described in the table below. + * _input_value_ is a pointer to memory where the appropriate parameterization + of the query is passed from. + If _input_value_ is `NULL`, it is ignored. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[kernel-sub-group-info-table]] +.List of supported param_names by {clGetKernelSubGroupInfo} +[width="100%",cols="<25%,<25%,<25%,<25%",options="header"] +|==== +| Kernel Sub-group Info | Input Type | Return Type | Description +| {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE.asciidoc[] + +Also see `<>`. + | {size_t_TYPE}* + | {size_t_TYPE} + | Returns the maximum sub-group size for this kernel. + All sub-groups must be the same size, while the last sub-group in + any work-group (i.e. the sub-group with the maximum index) could + be the same or smaller size. + + The _input_value_ must be an array of {size_t_TYPE} values + corresponding to the local work size parameter of the intended + dispatch. + The number of dimensions in the ND-range will be inferred from + the value specified for _input_value_size_. +| {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE.asciidoc[] + +Also see `<>`. + | {size_t_TYPE}* + | {size_t_TYPE} + | Returns the number of sub-groups that will be present in each + work-group for a given local work size. + All workgroups, apart from the last work-group in each dimension + in the presence of non-uniform work-group sizes, will have the + same number of sub-groups. + + The _input_value_ must be an array of {size_t_TYPE} values + corresponding to the local work size parameter of the intended + dispatch. + The number of dimensions in the ND-range will be inferred from + the value specified for _input_value_size_. +| {CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT.asciidoc[] + +Also see `<>`. + | {size_t_TYPE} + | {size_t_TYPE}[] + | Returns the local size that will generate the requested number + of sub-groups for the kernel. + The output array must be an array of {size_t_TYPE} values corresponding + to the local size parameter. + Any returned work-group will have one dimension. + Other dimensions inferred from the value specified for + param_value_size will be filled with the value 1. + The returned value will produce an exact number of sub-groups + and result in no partial groups for an executing kernel except + in the case where the last work-group in a dimension has a size + different from that of the other groups. + If no work-group size can accommodate the requested number of + sub-groups, 0 will be returned in each element of the return + array. +| {CL_KERNEL_MAX_NUM_SUB_GROUPS_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_MAX_NUM_SUB_GROUPS.asciidoc[] + +Also see `<>`. + | ignored + | {size_t_TYPE} + | This provides a mechanism for the application to query the + maximum number of sub-groups that may make up each work-group to + execute a kernel on a specific device given by device. + The OpenCL implementation uses the resource requirements of the + kernel (register usage etc.) to determine what this work-group + size should be. + The returned value may be used to compute a work-group size to + enqueue the kernel with to give a round number of sub-groups for + an enqueue. +| {CL_KERNEL_COMPILE_NUM_SUB_GROUPS_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_COMPILE_NUM_SUB_GROUPS.asciidoc[] + +Also see `<>`. + | ignored + | {size_t_TYPE} + | Returns the number of sub-groups per work-group specified in the kernel + source or IL. If the sub-group count is not specified then 0 is returned. +|==== + +// refError + +{clGetKernelSubGroupInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated + with _kernel_ or if _device_ is `NULL` but there is more than one device + associated with _kernel_. + * {CL_INVALID_OPERATION} if _device_ does not support sub-groups. + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table + and _param_value_ is not `NULL`. + * {CL_INVALID_VALUE} if _param_name_ is + {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE}, + {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE} or + {CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT} and the size in bytes specified + by _input_value_size_ is not valid or if _input_value_ is `NULL`. + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clGetKernelArgInfo',desc='Returns information about the arguments of a kernel.',type='protos'] +-- +To return information about the arguments of a kernel, call the function + +include::{generated}/api/protos/clGetKernelArgInfo.txt[] +include::{generated}/api/version-notes/clGetKernelArgInfo.asciidoc[] + + * _kernel_ specifies the kernel object being queried. + * _arg_index_ is the argument index. + Arguments to the kernel are referred by indices that go from 0 for the + leftmost argument to _n_ - 1, where _n_ is the total number of arguments + declared by a kernel. + * _param_name_ specifies the argument information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetKernelArgInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be > size of return type as described in the + <> table. + * _param_value_size ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +Kernel argument information is only available if the program object +associated with _kernel_: + +ifdef::cl_khr_spir[] + * is created with {clCreateProgramWithBinary} and the program executable + is built with the `-cl-kernel-arg-info` and `-x spir` options specified + in the _options_ argument to {clBuildProgram} or {clCompileProgram}, if + the `<>` extension is supported; or, +endif::cl_khr_spir[] + * is created with {clCreateProgramWithSource} and the program executable + is built with the `-cl-kernel-arg-info option` specified in the + _options_ argument to {clBuildProgram} or {clCompileProgram}, + +[[kernel-argument-info-table]] +.List of supported param_names by {clGetKernelArgInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Kernel Arg Info | Return Type | Description +| {CL_KERNEL_ARG_ADDRESS_QUALIFIER_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ARG_ADDRESS_QUALIFIER.asciidoc[] + | {cl_kernel_arg_address_qualifier_TYPE} + | Returns the address qualifier specified for the argument given by + _arg_index_. + This can be one of the following values: + + {CL_KERNEL_ARG_ADDRESS_GLOBAL_anchor} + + {CL_KERNEL_ARG_ADDRESS_LOCAL_anchor} + + {CL_KERNEL_ARG_ADDRESS_CONSTANT_anchor} + + {CL_KERNEL_ARG_ADDRESS_PRIVATE_anchor} + + If no address qualifier is specified, the default address qualifier + which is {CL_KERNEL_ARG_ADDRESS_PRIVATE} is returned. +| {CL_KERNEL_ARG_ACCESS_QUALIFIER_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ARG_ACCESS_QUALIFIER.asciidoc[] + | {cl_kernel_arg_access_qualifier_TYPE} + | Returns the access qualifier specified for the argument given by + _arg_index_. + This can be one of the following values: + + {CL_KERNEL_ARG_ACCESS_READ_ONLY_anchor} + + {CL_KERNEL_ARG_ACCESS_WRITE_ONLY_anchor} + + {CL_KERNEL_ARG_ACCESS_READ_WRITE_anchor} + + {CL_KERNEL_ARG_ACCESS_NONE_anchor} + + If argument is not an image type and is not declared with the pipe + qualifier, {CL_KERNEL_ARG_ACCESS_NONE} is returned. + If argument is an image type, the access qualifier specified or the + default access qualifier is returned. +| {CL_KERNEL_ARG_TYPE_NAME_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ARG_TYPE_NAME.asciidoc[] + | {char_TYPE}[] + | Returns the type name specified for the argument given by + _arg_index_. + The type name returned will be the argument type name as it was + declared with any whitespace removed. + If argument type name is an unsigned scalar type (i.e. unsigned + char, unsigned short, unsigned int, unsigned long), uchar, ushort, + uint and ulong will be returned. + The argument type name returned does not include any type + qualifiers. +| {CL_KERNEL_ARG_TYPE_QUALIFIER_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ARG_TYPE_QUALIFIER.asciidoc[] + | {cl_kernel_arg_type_qualifier_TYPE} + | Returns a bitfield describing one or more type qualifiers specified + for the argument given by _arg_index_. + The returned values can be: + + {CL_KERNEL_ARG_TYPE_CONST_anchor} + footnote:[{fn-kernel-arg-type-qualifier}] + footnote:[{fn-kernel-arg-type-const-addr-space}] + + {CL_KERNEL_ARG_TYPE_RESTRICT_anchor} + + {CL_KERNEL_ARG_TYPE_VOLATILE_anchor} + + {CL_KERNEL_ARG_TYPE_PIPE_anchor}, or + + {CL_KERNEL_ARG_TYPE_NONE_anchor} + + {CL_KERNEL_ARG_TYPE_NONE} is returned for all parameters passed by + value. +| {CL_KERNEL_ARG_NAME_anchor} + +include::{generated}/api/version-notes/CL_KERNEL_ARG_NAME.asciidoc[] + | {char_TYPE}[] + | Returns the name specified for the argument given by _arg_index_. +|==== + +{clGetKernelArgInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_ARG_INDEX} if _arg_index_ is not a valid argument index. + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_ size is < size of return type as described in + the <> table and + _param_value_ is not `NULL`. + * {CL_KERNEL_ARG_INFO_NOT_AVAILABLE} if the argument information is not + available for kernel. + * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. +-- + +ifdef::cl_khr_suggested_local_work_size[] +[open,refpage='clGetKernelSuggestedLocalWorkSizeKHR',desc='Query suggested local work size for a kernel object',type='protos'] +-- +To query a suggested local work size for a kernel object, call the function + +include::{generated}/api/protos/clGetKernelSuggestedLocalWorkSizeKHR.txt[] +include::{generated}/api/version-notes/clGetKernelSuggestedLocalWorkSizeKHR.asciidoc[] + + * _command_queue_ specifies the command-queue and device for the query. + * _kernel_ specifies the kernel object and kernel arguments for the query. + The OpenCL context associated with _kernel_ and _command_queue_ must the + same. + * _work_dim_ specifies the number of work dimensions in the input global + work offset and global work size, and the output suggested local work + size. + * _global_work_offset_ can be used to specify an array of at least + _work_dim_ global ID offset values for the query. + This is optional and may be `NULL` to indicate there is no global ID + offset. + * _global_work_size_ is an array of at least _work_dim_ values describing + the global work size for the query. + * _suggested_local_work_size_ is an output array of at least _work_dim_ + values that will contain the result of the query. + +The returned suggested local work size is expected to match the local work +size that would be chosen if the specified kernel object, with the same +kernel arguments, were enqueued into the specified command-queue with the +specified global work size, specified global work offset, and with a `NULL` +local work size. + +// refError + +{clGetKernelSuggestedLocalWorkSizeKHR} returns {CL_SUCCESS} if the query +executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_INVALID_CONTEXT} if the context associated with _kernel_ is not the + same as the context associated with _command_queue_. + * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built + program executable available for _kernel_ for the device associated with + _command_queue_. + * {CL_INVALID_KERNEL_ARGS} if all argument values for _kernel_ have not + been set. + * {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is set as an + argument to _kernel_ and the offset specified when the sub-buffer object + was created is not aligned to {CL_DEVICE_MEM_BASE_ADDR_ALIGN} for the + device associated with _command_queue_. + * {CL_INVALID_IMAGE_SIZE} if an image object is set as an argument to + _kernel_ and the image dimensions are not supported by device associated + with _command_queue_. + * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is set as an argument + to _kernel_ and the image format is not supported by the device + associated with _command_queue_. + * {CL_INVALID_OPERATION} if an SVM pointer is set as an argument to + _kernel_ and the device associated with _command_queue_ does not support + SVM or the required SVM capabilities for the SVM pointer. + * {CL_INVALID_WORK_DIMENSION} if _work_dim_ is not a valid value (i.e. a + value between 1 and {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}). + * {CL_INVALID_GLOBAL_WORK_SIZE} if _global_work_size_ is NULL or if any of + the values specified in _global_work_size_ are 0. + * {CL_INVALID_GLOBAL_WORK_SIZE} if any of the values specified in + _global_work_size_ exceed the maximum value representable by `size_t` on + the device associated with _command_queue_. + * {CL_INVALID_GLOBAL_OFFSET} if the value specified in _global_work_size_ + plus the corresponding value in _global_work_offset_ for dimension + exceeds the maximum value representable by `size_t` on the device + associated with _command_queue_. + * {CL_INVALID_VALUE} if _suggested_local_work_size_ is NULL. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +NOTE: These error conditions are consistent with error conditions for +{clEnqueueNDRangeKernel}. +-- +endif::cl_khr_suggested_local_work_size[] + + +== Executing Kernels + +[open,refpage='clEnqueueNDRangeKernel',desc='Enqueues a command to execute a kernel on a device.',type='protos'] +-- +To enqueue a command to execute a kernel on a device, call the function + +include::{generated}/api/protos/clEnqueueNDRangeKernel.txt[] +include::{generated}/api/version-notes/clEnqueueNDRangeKernel.asciidoc[] + + * _command_queue_ is a valid host command-queue. + The kernel will be queued for execution on the device associated with + _command_queue_. + * _kernel_ is a valid kernel object. + The OpenCL context associated with _kernel_ and _command-queue_ must be the + same. + * _work_dim_ is the number of dimensions used to specify the global work-items + and work-items in the work-group. + _work_dim_ must be greater than zero and less than or equal to + {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}. + If _global_work_size_ is `NULL`, or the value in any passed dimension is 0 + then the kernel command will trivially succeed after its event dependencies + are satisfied and subsequently update its completion event. + The behavior in this situation is similar to that of an enqueued marker, + except that unlike a marker, an enqueued kernel with no events passed to + _event_wait_list_ may run at any time. + * _global_work_offset_ can be used to specify an array of _work_dim_ unsigned + values that describe the offset used to calculate the global ID of a + work-item. + If _global_work_offset_ is `NULL`, the global IDs start at offset (0, 0, 0). + _global_work_offset_ must be `NULL` <> version 1.1. + * _global_work_size_ points to an array of _work_dim_ unsigned values that + describe the number of global work-items in _work_dim_ dimensions that will + execute the kernel function. + The total number of global work-items is computed as _global_work_size_[0] + {times} ... {times} _global_work_size_[_work_dim_ - 1]. + * _local_work_size_ points to an array of _work_dim_ unsigned values that + describe the number of work-items that make up a work-group (also referred + to as the size of the work-group) that will execute the kernel specified by + _kernel_. + The total number of work-items in a work-group is computed as + _local_work_size_[0] {times} ... {times} _local_work_size_[_work_dim_ - 1]. + The total number of work-items in the work-group must be less than or equal + to the {CL_KERNEL_WORK_GROUP_SIZE} value specified in the + <> table, and the + number of work-items specified in _local_work_size_[0], ..., + _local_work_size_[_work_dim_ - 1] must be less than or equal to the + corresponding values specified by {CL_DEVICE_MAX_WORK_ITEM_SIZES}[0], ..., + {CL_DEVICE_MAX_WORK_ITEM_SIZES}[_work_dim_ - 1]. + The explicitly specified _local_work_size_ will be used to determine how to + break the global work-items specified by _global_work_size_ into appropriate + work-group instances. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +An ND-range kernel command may require uniform work-groups or may support non-uniform work-groups. +To support non-uniform work-groups: + +. The device associated with _command_queue_ must support non-uniform work-groups. +. The program object associated with _kernel_ must support non-uniform work-groups. +Specifically, this means: +.. If the program was created with {clCreateProgramWithSource}, the program must be compiled or built using the `-cl-std=CL2.0` or `-cl-std=CL3.0` build option and without the `-cl-uniform-work-group-size` build option. +.. If the program was created with {clCreateProgramWithIL} or {clCreateProgramWithBinary}, the program must be compiled or built without the `-cl-uniform-work-group-size` build options. +.. If the program was created using {clLinkProgram}, all input programs must support non-uniform work-groups. + +If non-uniform work-groups are supported, any single dimension +for which the global size is not divisible by the local size will be +partitioned into two regions. +One region will have work-groups that have the same number of work-items as +was specified by the local size parameter in that dimension. +The other region will have work-groups with less than the number of work +items specified by the local size parameter in that dimension. +The global IDs and group IDs of the work-items in the first region will be +numerically lower than those in the second, and the second region will be at +most one work-group wide in that dimension. +Work-group sizes could be non-uniform in multiple dimensions, potentially +producing work-groups of up to 4 different sizes in a 2D range and 8 +different sizes in a 3D range. + +If non-uniform work-groups are supported and _local_work_size_ is `NULL`, the OpenCL runtime may choose a uniform or non-uniform work-group size. + +Otherwise, when non-uniform work-groups are not supported, the size of each work-group must be uniform. +If _local_work_size_ is specified, the values specified in _global_work_size_[0], ..., _global_work_size_[_work_dim_ - 1] must be evenly divisible by the corresponding values specified in _local_work_size_[0], ..., _local_work_size_[_work_dim_ - 1]. +If _local_work_size_ is `NULL`, the OpenCL runtime must choose a uniform work-group size. + +The work-group size to be used for _kernel_ can also be specified in the +program source or intermediate language. +In this case the size of work-group specified by _local_work_size_ must +match the value specified in the program source. + +These work-group instances are executed in parallel across multiple compute +units or concurrently on the same compute unit. + +Each work-item is uniquely identified by a global identifier. +The global ID, which can be read inside the kernel, is computed using the +value given by _global_work_size_ and _global_work_offset_. +In addition, a work-item is also identified within a work-group by a unique +local ID. +The local ID, which can also be read by the kernel, is computed using the +value given by _local_work_size_. +The starting local ID is always (0, 0, ..., 0). + +// refError + +{clEnqueueNDRangeKernel} returns {CL_SUCCESS} if the kernel-instance was +successfully queued. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built program + executable available for device associated with _command_queue_. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and + _kernel_ are not the same or if the context associated with + _command_queue_ and events in _event_wait_list_ are not the same. + * {CL_INVALID_KERNEL_ARGS} if the kernel argument values have not been + specified. + * {CL_INVALID_WORK_DIMENSION} if _work_dim_ is not a valid value (i.e. a + value between 1 and {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}). + * {CL_INVALID_GLOBAL_WORK_SIZE} if _global_work_size_ is NULL or if any of + the values specified in _global_work_size_[0], ... + _global_work_size_[_work_dim_ - 1] are 0. + Returning this error code under these circumstances is <> version 2.1. + * {CL_INVALID_GLOBAL_WORK_SIZE} if any of the values specified in + _global_work_size_[0], ... _global_work_size_[_work_dim_ - 1] exceed the + maximum value representable by {size_t_TYPE} on the device on which the + kernel-instance will be enqueued. + * {CL_INVALID_GLOBAL_OFFSET} if the value specified in _global_work_size_ + {plus} the corresponding values in _global_work_offset_ for any + dimensions is greater than the maximum value representable by size t on + the device on which the kernel-instance will be enqueued, or if + _global_work_offset_ is non-`NULL` <> version 1.1. + * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and does + not match the required work-group size for _kernel_ in the program + source. + * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and is not + consistent with the required number of sub-groups for _kernel_ in the + program source. + * {CL_INVALID_WORK_GROUP_SIZE} if _local_work_size_ is specified and the + total number of work-items in the work-group computed as + _local_work_size_[0] {times} ... _local_work_size_[_work_dim_ - 1] is + greater than the value specified by {CL_KERNEL_WORK_GROUP_SIZE} in the + <> table. + * {CL_INVALID_WORK_GROUP_SIZE} if the work-group size must be uniform and + the _local_work_size_ is not `NULL`, is not equal to the required + work-group size specified in the kernel source, or the + _global_work_size_ is not evenly divisible by the _local_work_size_. + * {CL_INVALID_WORK_ITEM_SIZE} if the number of work-items specified in any + of _local_work_size_[0], ... _local_work_size_[_work_dim_ - 1] is + greater than the corresponding values specified by + {CL_DEVICE_MAX_WORK_ITEM_SIZES}[0], ..., + {CL_DEVICE_MAX_WORK_ITEM_SIZES}[_work_dim_ - 1]. + * {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is specified as + the value for an argument that is a buffer object and the _offset_ + specified when the sub-buffer object is created is not aligned to + {CL_DEVICE_MEM_BASE_ADDR_ALIGN} value for device associated with _queue_. + This error code is <> version 1.1. + * {CL_INVALID_IMAGE_SIZE} if an image object is specified as an argument + value and the image dimensions (image width, height, specified or + compute row and/or slice pitch) are not supported by device associated + with _queue_. + * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is specified as an + argument value and the image format (image channel order and data type) + is not supported by device associated with _queue_. + * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution + instance of _kernel_ on the command-queue because of insufficient + resources needed to execute the kernel. + For example, the explicitly specified _local_work_size_ causes a failure + to execute the kernel because of insufficient resources such as + registers or local memory. + Another example would be the number of read-only image args used in + _kernel_ exceed the {CL_DEVICE_MAX_READ_IMAGE_ARGS} value for device or + the number of write-only and read-write image args used in _kernel_ + exceed the {CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS} value for device or the + number of samplers used in _kernel_ exceed {CL_DEVICE_MAX_SAMPLERS} for + device. + * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate + memory for data store associated with image or buffer objects specified + as arguments to _kernel_. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel + and the device does not support SVM or if system pointers are passed as + arguments to a kernel and/or stored inside SVM allocations passed as + kernel arguments and the device does not support fine grain system SVM + allocations. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueTask',desc='Enqueues a command to execute a kernel, using a single work-item, on a device.',type='protos'] +-- +To enqueue a command to execute a kernel on a device, using a single work-item, +call the function + +include::{generated}/api/protos/clEnqueueTask.txt[] +include::{generated}/api/version-notes/clEnqueueTask.asciidoc[] + + * _command_queue_ is a valid host command-queue. + The kernel will be queued for execution on the device associated with + _command_queue_. + * _kernel_ is a valid kernel object. + The OpenCL context associated with _kernel_ and _command-queue_ must be the + same. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not wait + on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +{clEnqueueTask} is equivalent to calling {clEnqueueNDRangeKernel} with +_work_dim_ set to 1, _global_work_offset_ set to `NULL`, _global_work_size[0]_ +set to 1, and _local_work_size[0]_ set to 1. + +// refError + +{clEnqueueTask} returns {CL_SUCCESS} if the kernel-instance was successfully +queued. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built program + executable available for device associated with _command_queue_. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and + _kernel_ are not the same or if the context associated with + _command_queue_ and events in _event_wait_list_ are not the same. + * {CL_INVALID_KERNEL_ARGS} if the kernel argument values have not been + specified. + * {CL_INVALID_WORK_GROUP_SIZE} if a work-group size is specified for _kernel_ + in the program source and it is not (1, 1, 1). +// TODO I'm not sure if the next error makes sense for a 'task'. + * {CL_INVALID_WORK_GROUP_SIZE} if the required number of sub-groups is + specified for _kernel_ in the program source and is not consistent with a + work-group size of (1, 1, 1). + * {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is specified as + the value for an argument that is a buffer object and the _offset_ + specified when the sub-buffer object is created is not aligned to + {CL_DEVICE_MEM_BASE_ADDR_ALIGN} value for device associated with _queue_. + This error code is <> version 1.1. + * {CL_INVALID_IMAGE_SIZE} if an image object is specified as an argument + value and the image dimensions (image width, height, specified or + compute row and/or slice pitch) are not supported by device associated + with _queue_. + * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is specified as an + argument value and the image format (image channel order and data type) + is not supported by device associated with _queue_. + * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution + instance of _kernel_ on the command-queue because of insufficient + resources needed to execute the kernel. See how this error code is used + with {clEnqueueNDRangeKernel} for examples. + * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate + memory for data store associated with image or buffer objects specified + as arguments to _kernel_. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel + and the device does not support SVM or if system pointers are passed as + arguments to a kernel and/or stored inside SVM allocations passed as + kernel arguments and the device does not support fine grain system SVM + allocations. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueNativeKernel',desc='Enqueues a command to execute a native C/C++ function not compiled using the OpenCL compiler.',type='protos'] +-- +To enqueue a command to execute a native C/{cpp} function not compiled using +the OpenCL compiler, call the function + +include::{generated}/api/protos/clEnqueueNativeKernel.txt[] +include::{generated}/api/version-notes/clEnqueueNativeKernel.asciidoc[] + + * _command_queue_ is a valid host command-queue. + A native user function can only be executed on a command-queue created on a + device that has {CL_EXEC_NATIVE_KERNEL} capability set in + {CL_DEVICE_EXECUTION_CAPABILITIES} as specified in the + <> table. + * _user_func_ is a pointer to a host-callable user function. + It is the application's responsibility to ensure that the host-callable user + function is thread-safe. + * _args_ is a pointer to the args list that _user_func_ should be called with. + * _cb_args_ is the size in bytes of the args list that _args_ points to. + * _num_mem_objects_ is the number of buffer objects that are passed in _args_. + * _mem_list_ is a list of valid buffer objects, if _num_mem_objects_ > 0. + The buffer object values specified in _mem_list_ are memory object handles + (`{cl_mem_TYPE}` values) returned by {clCreateBuffer} or {clCreateBufferWithProperties}, + or `NULL`. + * _args_mem_loc_ is a pointer to appropriate locations that _args_ points to + where memory object handles ({cl_mem_TYPE} values) are stored. + Before the user function is executed, the memory object handles are replaced + by pointers to global memory. + * _event_wait_list_, _num_events_in_wait_list_ and _event_ are as described in + {clEnqueueNDRangeKernel}. + +The data pointed to by _args_ and _cb_args_ bytes in size will be copied and +a pointer to this copied region will be passed to _user_func_. +The copy needs to be done because the memory objects ({cl_mem_TYPE} values) that +_args_ may contain need to be modified and replaced by appropriate pointers +to global memory. +When {clEnqueueNativeKernel} returns, the memory region pointed to by _args_ +can be reused by the application. + +// refError + +{clEnqueueNativeKernel} returns {CL_SUCCESS} if the user function execution +instance was successfully queued. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if _user_func_ is `NULL`. + * {CL_INVALID_VALUE} if _args_ is a `NULL` value and _cb_args_ > 0, or if + _args_ is a `NULL` value and _num_mem_objects_ > 0. + * {CL_INVALID_VALUE} if _args_ is not `NULL` and _cb_args_ is 0. + * {CL_INVALID_VALUE} if _num_mem_objects_ > 0 and _mem_list_ or + _args_mem_loc_ are `NULL`. + * {CL_INVALID_VALUE} if _num_mem_objects_ = 0 and _mem_list_ or + _args_mem_loc_ are not `NULL`. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ + cannot execute the native kernel. + * {CL_INVALID_MEM_OBJECT} if one or more memory objects specified in + _mem_list_ are not valid or are not buffer objects. + * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution + instance of _kernel_ on the command-queue because of insufficient + resources needed to execute the kernel. + * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate + memory for data store associated with buffer objects specified as + arguments to _kernel_. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel + and the device does not support SVM or if system pointers are passed as + arguments to a kernel and/or stored inside SVM allocations passed as + kernel arguments and the device does not support fine grain system SVM + allocations. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +[NOTE] +==== +The total number of read-only images specified as arguments to a kernel +cannot exceed {CL_DEVICE_MAX_READ_IMAGE_ARGS}. +Each image array argument to a kernel declared with the `read_only` +qualifier counts as one image. +The total number of write-only images specified as arguments to a kernel +cannot exceed {CL_DEVICE_MAX_WRITE_IMAGE_ARGS}. +Each image array argument to a kernel declared with the `write_only` +qualifier counts as one image. + +The total number of read-write images specified as arguments to a kernel +cannot exceed {CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS}. +Each image array argument to a kernel declared with the `read_write` +qualifier counts as one image. +==== +-- + + +[[event-objects]] +== Event Objects + +An event object can be used to track the execution status of a command. +The API calls that enqueue commands to a command-queue create a new event +object that is returned in the _event_ argument. +In case of an error enqueuing the command in the command-queue the event +argument does not return an event object. + +The execution status of an enqueued command at any given point in time can +be one of the following: + + * {CL_QUEUED_anchor}: Indicates that the command has been enqueued in a + command-queue. + This is the initial state of all events except user events. + * {CL_SUBMITTED_anchor}: The initial state for all user events. + For all other events, indicates that the command has been submitted + by the host to the device. + * {CL_RUNNING_anchor}: Indicates that the device has started executing this + command. + In order for the execution status of an enqueued command to change from + {CL_SUBMITTED} to {CL_RUNNING}, all events that this command is waiting on + must have completed successfully i.e. their execution status must be + {CL_COMPLETE}. + * {CL_COMPLETE_anchor}: Indicates that the command has successfully completed. + * An Error Code: A negative integer value indicating that the command was + abnormally terminated. Abnormal termination may occur for a number of reasons, + such as a bad memory access. + +[NOTE] +==== +A command is considered to be complete if its execution status is +{CL_COMPLETE} or is a negative integer value. + +If the execution of a command is terminated, the command-queue associated +with this terminated command, and the associated context (and all other +command-queues in this context) may no longer be available. +The behavior of OpenCL API calls that use this context (and command-queues +associated with this context) are now considered to be +implementation-defined. +The user registered callback function specified when context is created can +be used to report appropriate error information. +==== + + +=== Creating, Waiting for, and Releasing Event Objects + +[open,refpage='clCreateUserEvent',desc='Creates a user event object.',type='protos'] +-- +To create a user event object, call the function + +include::{generated}/api/protos/clCreateUserEvent.txt[] +include::{generated}/api/version-notes/clCreateUserEvent.asciidoc[] + + * _context_ must be a valid OpenCL context. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +User events allow applications to enqueue commands that wait on a user event +to finish before the command is executed by the device. + +// refError + +{clCreateUserEvent} returns a valid non-zero event object and _errcode_ret_ +is set to {CL_SUCCESS} if the user event object is created successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +The initial execution status for the user event object is {CL_SUBMITTED}. +-- + +[open,refpage='clSetUserEventStatus',desc='Sets the execution status of a user event object.',type='protos'] +-- +To set the execution status of a user event object, call the function + +include::{generated}/api/protos/clSetUserEventStatus.txt[] +include::{generated}/api/version-notes/clSetUserEventStatus.asciidoc[] + + * _event_ is a user event object created using {clCreateUserEvent}. + * _execution_status_ specifies the new execution status to be set and can be + {CL_COMPLETE} or a negative integer value to indicate an error. + A negative integer value causes all enqueued commands that wait on this user + event to be terminated. + {clSetUserEventStatus} can only be called once to change the execution + status of _event_. + +[NOTE] +==== +If there are enqueued commands with user events in the _event_wait_list_ +argument of *+clEnqueue*+* commands, the user must ensure that the status of +these user events being waited on are set using {clSetUserEventStatus} +before any OpenCL APIs that release OpenCL objects except for event objects +are called; otherwise the behavior is undefined. + +For example, the following code sequence will result in undefined behavior +of {clReleaseMemObject}. + +[source,opencl] +---- +ev1 = clCreateUserEvent(ctx, NULL); +clEnqueueWriteBuffer(cq, buf1, CL_FALSE, ..., 1, &ev1, NULL); +clEnqueueWriteBuffer(cq, buf2, CL_FALSE, ...); +clReleaseMemObject(buf2); +clSetUserEventStatus(ev1, CL_COMPLETE); +---- + +The following code sequence, however, works correctly. + +[source,opencl] +---- +ev1 = clCreateUserEvent(ctx, NULL); +clEnqueueWriteBuffer(cq, buf1, CL_FALSE, ..., 1, &ev1, NULL); +clEnqueueWriteBuffer(cq, buf2, CL_FALSE, ...); +clSetUserEventStatus(ev1, CL_COMPLETE); +clReleaseMemObject(buf2); +---- +==== + +// refError + +{clSetUserEventStatus} returns {CL_SUCCESS} if the function was executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_EVENT} if _event_ is not a valid user event object. + * {CL_INVALID_VALUE} if the _execution_status_ is not {CL_COMPLETE} or a + negative integer value. + * {CL_INVALID_OPERATION} if the _execution_status_ for _event_ has already + been changed by a previous call to {clSetUserEventStatus}. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clWaitForEvents',desc='Waits on the host thread for commands identified by event objects to complete.',type='protos'] +-- +To wait for events to complete, call the function + +include::{generated}/api/protos/clWaitForEvents.txt[] +include::{generated}/api/version-notes/clWaitForEvents.asciidoc[] + + * _num_events_ is the number of events in _event_list_. + * _event_list_ is a pointer to a list of event object handles. + +This function waits on the host thread for commands identified by event +objects in _event_list_ to complete. +A command is considered complete if its execution status is {CL_COMPLETE} or a +negative value. +The events specified in _event_list_ act as synchronization points. + +// refError + +{clWaitForEvents} returns {CL_SUCCESS} if the execution status of all events +in _event_list_ is {CL_COMPLETE}. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_VALUE} if _num_events_ is zero or _event_list_ is `NULL`. + * {CL_INVALID_CONTEXT} if events specified in _event_list_ do not belong to + the same context. + * {CL_INVALID_EVENT} if event objects specified in _event_list_ are not + valid event objects. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of + any of the events in _event_list_ is a negative integer value. + This error code is <> version 1.1. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clGetEventInfo',desc='Returns information about the event object.',type='protos'] +-- +To return information about an event object, call the function + +include::{generated}/api/protos/clGetEventInfo.txt[] +include::{generated}/api/version-notes/clGetEventInfo.asciidoc[] + + * _event_ specifies the event object being queried. + * _param_name_ specifies the information to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetEventInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[event-info-table]] +.List of supported param_names by {clGetEventInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Event Info | Return Type | Description +| {CL_EVENT_COMMAND_QUEUE_anchor} + +include::{generated}/api/version-notes/CL_EVENT_COMMAND_QUEUE.asciidoc[] + | {cl_command_queue_TYPE} + | Return the command-queue associated with _event_. + For user event objects, a `NULL` value is returned. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is + supported, for events returned by a command-buffer enqueue operation + to multiple command-queues, `NULL` is returned. +endif::cl_khr_command_buffer_multi_device[] + +| {CL_EVENT_CONTEXT_anchor} + +include::{generated}/api/version-notes/CL_EVENT_CONTEXT.asciidoc[] + | {cl_context_TYPE} + | Return the context associated with _event_. +| {CL_EVENT_COMMAND_TYPE_anchor} + +include::{generated}/api/version-notes/CL_EVENT_COMMAND_TYPE.asciidoc[] + | {cl_command_type_TYPE} + | Return the command type associated with _event_ as described in the + <> table. + +| {CL_EVENT_COMMAND_EXECUTION_STATUS_anchor} footnote:[{fn-event-status-order}] + +include::{generated}/api/version-notes/CL_EVENT_COMMAND_EXECUTION_STATUS.asciidoc[] + | {cl_int_TYPE} + | Return the execution status of the command identified by event. + Valid values are: + + {CL_QUEUED} - Command has been enqueued in the command-queue. + + {CL_SUBMITTED} - Enqueued command has been submitted by the host to the + device associated with the command-queue. + + {CL_RUNNING} - Device is currently executing this command. + + {CL_COMPLETE} - The command has completed. + + Or an error code given by a negative integer value (command was + abnormally terminated - this may be caused by a bad memory access + etc.). + These error codes come from the same set of error codes that are + returned from the platform or runtime API calls as return values or + *errcode_ret* values. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is + supported, for events returned by a command-buffer enqueue operation + to multiple command-queues the semantics of execution status is as + follows: + + {CL_QUEUED} - Command-buffer has been enqueued across the + command-queues. + + {CL_SUBMITTED} - Commands from the command-buffer have been + submitted by the host to any device associated with one of the + command-queues. + + {CL_RUNNING} - Any command from the command-buffer has started + execution on a device. + + {CL_COMPLETE} - All commands have completed on all devices. +endif::cl_khr_command_buffer_multi_device[] +| {CL_EVENT_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] + +include::{generated}/api/version-notes/CL_EVENT_REFERENCE_COUNT.asciidoc[] + | {cl_uint_TYPE} + | Return the _event_ reference count. +|==== + +[[event-command-type-table]] +.List of supported event command types +[width="100%",cols="2,3",options="header"] +|==== +| Events Created By | Event Command Type + +| {clEnqueueNDRangeKernel} + | {CL_COMMAND_NDRANGE_KERNEL_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_NDRANGE_KERNEL.asciidoc[] + +| {clEnqueueTask} + | {CL_COMMAND_TASK_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_TASK.asciidoc[] + +| {clEnqueueNativeKernel} + | {CL_COMMAND_NATIVE_KERNEL_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_NATIVE_KERNEL.asciidoc[] + +| {clEnqueueReadBuffer} + | {CL_COMMAND_READ_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_READ_BUFFER.asciidoc[] + +| {clEnqueueWriteBuffer} + | {CL_COMMAND_WRITE_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_WRITE_BUFFER.asciidoc[] + +| {clEnqueueCopyBuffer} + | {CL_COMMAND_COPY_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER.asciidoc[] + +| {clEnqueueReadImage} + | {CL_COMMAND_READ_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_READ_IMAGE.asciidoc[] + +| {clEnqueueWriteImage} + | {CL_COMMAND_WRITE_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_WRITE_IMAGE.asciidoc[] + +| {clEnqueueCopyImage} + | {CL_COMMAND_COPY_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COPY_IMAGE.asciidoc[] + +| {clEnqueueCopyBufferToImage} + | {CL_COMMAND_COPY_BUFFER_TO_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER_TO_IMAGE.asciidoc[] + +| {clEnqueueCopyImageToBuffer} + | {CL_COMMAND_COPY_IMAGE_TO_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COPY_IMAGE_TO_BUFFER.asciidoc[] + +| {clEnqueueMapBuffer} + | {CL_COMMAND_MAP_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_MAP_BUFFER.asciidoc[] + +| {clEnqueueMapImage} + | {CL_COMMAND_MAP_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_MAP_IMAGE.asciidoc[] + +| {clEnqueueUnmapMemObject} + | {CL_COMMAND_UNMAP_MEM_OBJECT_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_UNMAP_MEM_OBJECT.asciidoc[] + +| {clEnqueueMarker}, + + {clEnqueueMarkerWithWaitList} + | {CL_COMMAND_MARKER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_MARKER.asciidoc[] + +| {clEnqueueReadBufferRect} + | {CL_COMMAND_READ_BUFFER_RECT_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_READ_BUFFER_RECT.asciidoc[] + +| {clEnqueueWriteBufferRect} + | {CL_COMMAND_WRITE_BUFFER_RECT_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_WRITE_BUFFER_RECT.asciidoc[] + +| {clEnqueueCopyBufferRect} + | {CL_COMMAND_COPY_BUFFER_RECT_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER_RECT.asciidoc[] + +| {clCreateUserEvent} + | {CL_COMMAND_USER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_USER.asciidoc[] + +| {clEnqueueBarrier}, + + {clEnqueueBarrierWithWaitList} + | {CL_COMMAND_BARRIER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_BARRIER.asciidoc[] + +| {clEnqueueMigrateMemObjects} + | {CL_COMMAND_MIGRATE_MEM_OBJECTS_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_MIGRATE_MEM_OBJECTS.asciidoc[] + +| {clEnqueueFillBuffer} + | {CL_COMMAND_FILL_BUFFER_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_FILL_BUFFER.asciidoc[] + +| {clEnqueueFillImage} + | {CL_COMMAND_FILL_IMAGE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_FILL_IMAGE.asciidoc[] + +| {clEnqueueSVMFree} + | {CL_COMMAND_SVM_FREE_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_FREE.asciidoc[] + +| {clEnqueueSVMMemcpy} + | {CL_COMMAND_SVM_MEMCPY_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_MEMCPY.asciidoc[] + +| {clEnqueueSVMMemFill} + | {CL_COMMAND_SVM_MEMFILL_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_MEMFILL.asciidoc[] + +| {clEnqueueSVMMap} + | {CL_COMMAND_SVM_MAP_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_MAP.asciidoc[] + +| {clEnqueueSVMUnmap} + | {CL_COMMAND_SVM_UNMAP_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_UNMAP.asciidoc[] + +| {clEnqueueSVMMigrateMem} + | {CL_COMMAND_SVM_MIGRATE_MEM_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_SVM_MIGRATE_MEM.asciidoc[] + +Prior to OpenCL 3.0, implementations should return +{CL_COMMAND_MIGRATE_MEM_OBJECTS}, but may return an implementation-defined +event command type for {clEnqueueSVMMigrateMem}. + +ifdef::cl_khr_command_buffer[] +| {clEnqueueCommandBufferKHR} + | {CL_COMMAND_COMMAND_BUFFER_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_COMMAND_BUFFER_KHR.asciidoc[] +endif::cl_khr_command_buffer[] + +ifdef::cl_khr_dx9_media_sharing[] +| {clEnqueueAcquireDX9MediaSurfacesKHR} + | {CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR.asciidoc[] + +| {clEnqueueReleaseDX9MediaSurfacesKHR} + | {CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR.asciidoc[] +endif::cl_khr_dx9_media_sharing[] + +ifdef::cl_khr_d3d10_sharing[] +| {clEnqueueAcquireD3D10ObjectsKHR} + | {CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR.asciidoc[] + +| {clEnqueueReleaseD3D10ObjectsKHR} + | {CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR.asciidoc[] +endif::cl_khr_d3d10_sharing[] + +ifdef::cl_khr_d3d11_sharing[] +| {clEnqueueAcquireD3D11ObjectsKHR} + | {CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR.asciidoc[] + +| {clEnqueueReleaseD3D11ObjectsKHR} + | {CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR.asciidoc[] +endif::cl_khr_d3d11_sharing[] + +ifdef::cl_khr_egl_image[] +| {clEnqueueAcquireEGLObjectsKHR} + | {CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR.asciidoc[] +| {clEnqueueReleaseEGLObjectsKHR} + | {CL_COMMAND_RELEASE_EGL_OBJECTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_RELEASE_EGL_OBJECTS_KHR.asciidoc[] +endif::cl_khr_egl_image[] + +ifdef::cl_khr_egl_event[] +| {clCreateEventFromEGLSyncKHR} + | {CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR.asciidoc[] +endif::cl_khr_egl_event[] + +ifdef::cl_khr_gl_sharing[] +| {clEnqueueAcquireGLObjects} + | {CL_COMMAND_ACQUIRE_GL_OBJECTS_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_GL_OBJECTS.asciidoc[] +| {clEnqueueReleaseGLObjects} + | {CL_COMMAND_RELEASE_GL_OBJECTS_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_RELEASE_GL_OBJECTS.asciidoc[] +endif::cl_khr_gl_sharing[] + +ifdef::cl_khr_gl_event[] +| {clCreateEventFromGLsyncKHR} + | {CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR.asciidoc[] +endif::cl_khr_gl_event[] + +|==== + +Using {clGetEventInfo} to determine if a command identified by _event_ has +finished execution (i.e. {CL_EVENT_COMMAND_EXECUTION_STATUS} returns +{CL_COMPLETE}) is not a synchronization point. +There are no guarantees that the memory objects being modified by command +associated with _event_ will be visible to other enqueued commands. + +// refError + +{clGetEventInfo} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table and + _param_value_ is not `NULL`. + * {CL_INVALID_VALUE} if information to query given in _param_name_ cannot be + queried for _event_. + * {CL_INVALID_EVENT} if _event_ is a not a valid event object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clSetEventCallback',desc='Registers a user callback function for a specific command execution status.',type='protos'] +-- +To register a user callback function for a specific command execution +status, call the function + +include::{generated}/api/protos/clSetEventCallback.txt[] +include::{generated}/api/version-notes/clSetEventCallback.asciidoc[] + + * _event_ is a valid event object. + * _command_exec_callback_type_ specifies the command execution status for + which the callback is registered. + The command execution status types for which a callback can be registered + are {CL_SUBMITTED}, {CL_RUNNING}, or {CL_COMPLETE}. + The callback function registered for a _command_exec_callback_type_ value of + {CL_COMPLETE} will be called when the command has completed successfully or + is abnormally terminated. + * _pfn_event_notify_ is the event callback function that can be registered by + the application. + This callback function may be called asynchronously by the OpenCL + implementation. + It is the application's responsibility to ensure that the callback function + is thread-safe. + The parameters to this callback function are: + ** _event_ is the event object for which the callback function is invoked. + ** _event_command_status_ is equal to the _command_exec_callback_type_ + used while registering the callback. + Refer to the <> + table for the command execution status values. + If the callback is called as the result of the command associated with + event being abnormally terminated, an appropriate error code for the + error that caused the termination will be passed to + _event_command_status_ instead. + ** _user_data_ is a pointer to user supplied data. + * _user_data_ will be passed as the _user_data_ argument when _pfn_notify_ is + called. + _user_data_ can be `NULL`. + +Each call to {clSetEventCallback} registers the specified user callback +function on a callback stack associated with _event_. +The order in which the registered user callback functions are called is +undefined. + +The registered callback function will be called when the execution status of the +command associated with _event_ changes to an execution status equal to or past +the status specified by _command_exec_status_, or for the execution status +{CL_COMPLETE}, if the command is abnormally terminated. +There is no guarantee that the callback functions registered for various command +execution status values for an event will be called in the exact order that the +execution status of a command changes. +Furthermore, it should be noted that calling a callback for an event execution +status other than {CL_COMPLETE} in no way implies that the memory model or +execution model as defined by the OpenCL specification has changed. For example, +it is not valid to assume that a corresponding memory transfer has completed +unless the event is in the state {CL_COMPLETE}. + +All callbacks registered for an event object must be called before the event +object is destroyed. + +Callbacks should return promptly. +Behavior is undefined when calling expensive system routines, OpenCL APIs to +create contexts or command-queues, or blocking OpenCL APIs in an event callback. +Rather than calling a blocking OpenCL API in an event callback, applications +may call a non-blocking OpenCL API, then register a completion callback +for the non-blocking OpenCL API with the remainder of the work. + +Because commands in a command-queue are not required to begin execution +until the command-queue is flushed, callbacks that enqueue commands on a +command-queue should either call {clFlush} on the queue before returning, +or arrange for the command-queue to be flushed later. + +// refError + +{clSetEventCallback} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_EVENT} if _event_ is not a valid event object. + * {CL_INVALID_VALUE} if _pfn_event_notify_ is `NULL` or if + _command_exec_callback_type_ is not {CL_SUBMITTED}, {CL_RUNNING}, or + {CL_COMPLETE}. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +[open,refpage='clRetainEvent',desc='Increments the event reference count.',type='protos'] +-- +To retain an event object, call the function + +include::{generated}/api/protos/clRetainEvent.txt[] +include::{generated}/api/version-notes/clRetainEvent.asciidoc[] + + * _event_ is the event object to be retained. + +The _event_ reference count is incremented. +The OpenCL commands that return an event perform an implicit retain. + +// refError + +{clRetainEvent} returns {CL_SUCCESS} if the function is executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_EVENT} if _event_ is not a valid event object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clReleaseEvent',desc='Decrements the event reference count.',type='protos'] +-- +To release an event object, call the function + +include::{generated}/api/protos/clReleaseEvent.txt[] +include::{generated}/api/version-notes/clReleaseEvent.asciidoc[] + + * _event_ is the event object to be released. + +The _event_ reference count is decremented. + +The event object is deleted once the reference count becomes zero, the +specific command identified by this event has completed (or terminated) and +there are no commands in the command-queues of a context that require a wait +for this event to complete. +Using this function to release a reference that was not obtained by creating +the object or by calling {clRetainEvent} causes undefined behavior. + +[NOTE] +==== +Developers should be careful when releasing their last reference count on +events created by {clCreateUserEvent} that have not yet been set to status +of {CL_COMPLETE} or an error. +If the user event was used in the event_wait_list argument passed to a +*+clEnqueue*+* API or another application host thread is waiting for it in +{clWaitForEvents}, those commands and host threads will continue to wait for +the event status to reach {CL_COMPLETE} or error, even after the application +has released the object. +Since in this scenario the application has released its last reference count +to the user event, it would be in principle no longer valid for the +application to change the status of the event to unblock all the other +machinery. +As a result the waiting tasks will wait forever, and associated events, +{cl_mem_TYPE} objects, command-queues and contexts are likely to leak. +In-order command-queues caught up in this deadlock may cease to do any work. +==== + +// refError + +{clReleaseEvent} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_EVENT} if _event_ is not a valid event object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +ifdef::cl_khr_egl_event[] +==== Linking Event Objects to EGL Fence Sync Objects + +An event object may be created by linking to an EGL *fence sync object*. + +[open,refpage='clCreateEventFromEGLSyncKHR',desc='Link OpenCL event object to an EGL sync object',type='protos'] +-- +To create an OpenCL event object linked to an EGL fence sync object, call +the function + +include::{generated}/api/protos/clCreateEventFromEGLSyncKHR.txt[] +include::{generated}/api/version-notes/clCreateEventFromEGLSyncKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from an OpenGL context or + share group, using the `<>` extension. + * _sync_ is the name of a sync object of type `EGL_SYNC_FENCE_KHR` created + with respect to `EGLDisplay` _display_. + * _display_ is the `EGLDisplay` handle. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +Completion of such an event object is equivalent to waiting for completion +of the fence command associated with the linked EGL sync object. + +The parameters of an event object linked to an EGL sync object will return +the following values when queried with {clGetEventInfo}: + + * The {CL_EVENT_COMMAND_QUEUE} of a linked event is `NULL`, because the + event is not associated with any OpenCL command-queue. + * The {CL_EVENT_COMMAND_TYPE} of a linked event is + {CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR}, indicating that the event is + associated with a EGL sync object, rather than an OpenCL command. + * The {CL_EVENT_COMMAND_EXECUTION_STATUS} of a linked event is either + {CL_SUBMITTED}, indicating that the fence command associated with the + sync object has not yet completed, or {CL_COMPLETE}, indicating that the + fence command has completed. + +{clCreateEventFromEGLSyncKHR} performs an implicit {clRetainEvent} on the +returned event object. +Creating a linked event object also places a reference on the linked EGL +sync object. +When the event object is deleted, the reference will be removed from the EGL +sync object. + +Events returned from {clCreateEventFromEGLSyncKHR} may only be consumed by +{clEnqueueAcquire}*** commands. +Passing such events to any other CL API that enqueues commands will generate +a {CL_INVALID_EVENT} error. + +// refError + +{clCreateEventFromEGLSyncKHR} returns a valid OpenCL event object and +_errcode_ret_ is set to {CL_SUCCESS} if the event object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context, or was not + created from a GL context. + * {CL_INVALID_EGL_OBJECT_KHR} if _sync_ is not a valid EGLSyncKHR object + of type `EGL_SYNC_FENCE_KHR` created with respect to `EGLDisplay` + _display_. +-- + + +[[explicit-sync-using-egl-fences]] +===== Explicit Synchronization Using EGL Fence Sync Objects + +If the `<>` extension is supported, event objects created +with {clCreateEventFromEGLsyncKHR} provide another method of coordinating +sharing between EGL / EGL client API objects, and OpenCL. + +Completion of EGL and EGL client API commands may be determined by + + * placing an EGL fence command after commands using `eglCreateSyncKHR`; + * creating an event from the resulting EGL sync object using + {clCreateEventFromEGLSyncKHR}; and + * determining completion of that event object via + {clEnqueueAcquireGLObjects}. + +This method may be considerably more efficient than calling operations like +`glFinish`, and is referred to as _explicit synchronization_. +The application is responsible for ensuring the command stream associated +with the EGL fence is flushed to ensure the CL queue is submitted to the +device. +Explicit synchronization is most useful when an EGL client API context bound +to another thread is accessing the memory objects. + +endif::cl_khr_egl_event[] + + +ifdef::cl_khr_gl_event[] +==== Linking Event Objects to OpenGL Fence Sync Objects + +An event object may be created by linking to an OpenGL *fence sync object*. + +[open,refpage='clCreateEventFromGLsyncKHR',desc='Create OpenCL event object linked to an OpenGL sync object',type='protos'] +-- +To create an OpenCL event object linked to an OpenGL fence sync object, call +the function + +include::{generated}/api/protos/clCreateEventFromGLsyncKHR.txt[] +include::{generated}/api/version-notes/clCreateEventFromGLsyncKHR.asciidoc[] + + * _context_ is a valid OpenCL context created from an OpenGL context or + share group, using the `<>` extension. + * _sync_ is the name of a sync object in the GL share group associated + with _context_. + * _errcode_ret_ will return an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. + +Completion of such an event object is equivalent to waiting for completion +of the fence command associated with the linked GL sync object. + +// refError + +{clCreateEventFromGLsyncKHR} returns a valid OpenCL event object and +_errcode_ret_ is set to {CL_SUCCESS} if the event object is created +successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context, or was not + created from a GL context. + * {CL_INVALID_GL_OBJECT} if _sync_ is not the name of a sync object in the + GL share group associated with _context_. + +The parameters of an event object linked to a GL sync object will return the +following values when queried with {clGetEventInfo}: + + * The {CL_EVENT_COMMAND_QUEUE} of a linked event is `NULL`, because the + event is not associated with any OpenCL command-queue. + * The {CL_EVENT_COMMAND_TYPE} of a linked event is + {CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR}, indicating that the event is + associated with a GL sync object, rather than an OpenCL command. + * The {CL_EVENT_COMMAND_EXECUTION_STATUS} of a linked event is either + {CL_SUBMITTED}, indicating that the fence command associated with the + sync object has not yet completed, or {CL_COMPLETE}, indicating that the + fence command has completed. + +{clCreateEventFromGLsyncKHR} performs an implicit {clRetainEvent} on the +returned event object. +Creating a linked event object also places a reference on the linked GL sync +object. +When the event object is deleted, the reference will be removed from the GL +sync object. + +Events returned from {clCreateEventFromGLsyncKHR} can be used in the +_event_wait_list_ argument to {clEnqueueAcquireGLObjects} and CL APIs that +take a {cl_event} as an argument but do not enqueue commands. +Passing such events to any other CL API that enqueues commands will generate +a {CL_INVALID_EVENT} error. +-- + + +[[explicit-sync-using-opengl-fences]] +===== Explicit Synchronization Using OpenGL Fence Sync Objects + +If the `<>` extension is supported, event objects created +with {clCreateEventFromGLsyncKHR} provide another method of coordinating +sharing of buffers and images between OpenGL and OpenCL. + +Completion of OpenGL commands may be determined by + + * placing an OpenGL fence command after commands using `glFenceSync`; + * creating an event from the resulting OpenGL sync object using + {clCreateEventFromGLSyncKHR}; and + * determining completion of that event object via + {clEnqueueAcquireGLObjects}. + +This method may be considerably more efficient than calling `glFinish`, and +is referred to as _explicit synchronization_. +Explicit synchronization is most useful when an OpenGL context bound to +another thread is accessing the memory objects. + +Explicit synchronization is most useful when an OpenGL context bound to +another thread is accessing the memory objects. + +endif::cl_khr_gl_event[] + + +[[markers-barriers-waiting-for-events]] +== Markers, Barriers and Waiting for Events + +[open,refpage='clEnqueueMarkerWithWaitList',desc='Enqueues a marker command which waits for either a list of events to complete, or all previously enqueued commands to complete.',type='protos'] +-- +To enqueue a marker command which waits for events or commands to complete, +call the function + +include::{generated}/api/protos/clEnqueueMarkerWithWaitList.txt[] +include::{generated}/api/version-notes/clEnqueueMarkerWithWaitList.asciidoc[] + + * _command_queue_ is a valid host command-queue. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + * _event_ returns an event object that identifies this command and + can be used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. +If _event_wait_list_ is not `NULL`, the list of events pointed to by +_event_wait_list_ must be valid and _num_events_in_wait_list_ must be +greater than 0. +The events specified in _event_wait_list_ act as synchronization points. +The context associated with events in _event_wait_list_ and _command_queue_ +must be the same. +The memory associated with _event_wait_list_ can be reused or freed after +the function returns. + +If _event_wait_list_ is `NULL`, then this particular command waits until all +previous enqueued commands to _command_queue_ have completed. + +The marker command either waits for a list of events to complete, or if the +list is empty it waits for all commands previously enqueued in +_command_queue_ to complete before it completes. +This command returns an _event_ which can be waited on, i.e. this event can +be waited on to insure that all events either in the _event_wait_list_ or +all previously enqueued commands, queued before this command to +_command_queue_, have completed. + +// refError + +{clEnqueueMarkerWithWaitList} returns {CL_SUCCESS} if the function is +successfully executed. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueMarker',desc='Enqueues a marker command which waits for all previously enqueued commands to complete.',type='protos'] +-- +To enqueue a marker command which waits for previous commands to complete, call +the function + +include::{generated}/api/protos/clEnqueueMarker.txt[] +include::{generated}/api/version-notes/clEnqueueMarker.asciidoc[] + + * _command_queue_ is a valid host command-queue. + * _event_ returns an event object that identifies this command and + can be used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +The marker command waits for all commands previously enqueued in _command_queue_ to complete before it completes. +This command returns an _event_ which can be waited on, i.e. this event can be +waited on to insure that all previously enqueued commands, queued before this +command to _command_queue_, have completed. + +// refError + +{clEnqueueMarker} returns {CL_SUCCESS} if the function is successfully +executed. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_VALUE} if _event_ is `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueWaitForEvents',desc='Enqueues a wait on a list of events to complete.',type='protos'] +-- +To enqueue a wait for a specific event or a list of events to complete before any future commands queued in a command-queue are executed, call the function + +include::{generated}/api/protos/clEnqueueWaitForEvents.txt[] +include::{generated}/api/version-notes/clEnqueueWaitForEvents.asciidoc[] + + * _command_queue_ is a valid host command-queue. + * _event_list_ and _num_events_ specify events that need to complete before + this particular command can be executed. + +// Note, this parameter is called event_list (like clWaitForEvents) rather than +// event_wait_list(like clEnqueueMarkerWithWaitList etc.) because the function +// predates wait lists (and CL_INVALID_EVENT_WAIT_LIST). + +The events specified in _event_list_ act as synchronization points. +The context associated with events in _event_list_ and _command_queue_ must be +the same. +The memory associated with _event_list_ can be reused or freed after the +function returns. + +// refError + +{clEnqueueWaitForEvents} returns {CL_SUCCESS} if the function is successfully +executed. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_list_ are not the same. + * {CL_INVALID_VALUE} if _num_events_ is 0 or _event_list_ is `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueBarrierWithWaitList',desc='A synchronization point that enqueues a barrier operation.',type='protos'] +-- +To enqueue a barrier command which waits for events or commands to complete, +call the function + +include::{generated}/api/protos/clEnqueueBarrierWithWaitList.txt[] +include::{generated}/api/version-notes/clEnqueueBarrierWithWaitList.asciidoc[] + + * _command_queue_ is a valid host command-queue. + * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + * If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and _command_queue_ + must be the same. + The memory associated with _event_wait_list_ can be reused or freed after + the function returns. + * _event_ returns an event object that identifies this command and + can be used to query or wait for this command to complete. + If _event_ is `NULL` or the enqueue is unsuccessful, no event will be + created and therefore it will not be possible to query the status of this + command or to wait for this command to complete. + If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer + to an element of the _event_wait_list_ array. + +If _event_wait_list_ is `NULL`, then this particular command waits until all +previous enqueued commands to _command_queue_ have completed. + +The barrier command either waits for a list of events to complete, or if the +list is empty it waits for all commands previously enqueued in +_command_queue_ to complete before it completes. +This command blocks command execution, that is, any following commands +enqueued after it do not execute until it completes. +This command returns an _event_ which can be waited on, i.e. this event can +be waited on to insure that all events either in the _event_wait_list_ or +all previously enqueued commands, queued before this command to +_command_queue_, have completed. + +// refError + +{clEnqueueBarrierWithWaitList} returns {CL_SUCCESS} if the function is +successfully executed. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events + in _event_wait_list_ are not the same. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueBarrier',desc='A synchronization point that enqueues a barrier operation.',type='protos'] +-- +To enqueue a barrier command which waits for commands to complete, call the +function + +include::{generated}/api/protos/clEnqueueBarrier.txt[] +include::{generated}/api/version-notes/clEnqueueBarrier.asciidoc[] + + * _command_queue_ is a valid host command-queue. + +The barrier command waits for all commands previously enqueued in +_command_queue_ to complete before it completes. +This command blocks command execution, that is, any following commands +enqueued after it do not execute until it completes. +// TODO clEnqueueBarrierWithWaitList doesn't say synchronization point, should +// it, or should the next line be removed? The main difference is that +// clEnqueueBarrierWithWaitList returns an event, which is the synchronization +// point. +The barrier command is a synchronization point. + +// refError + +{clEnqueueBarrier} returns {CL_SUCCESS} if the function is successfully +executed. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host + command-queue. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required + by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +ifdef::cl_khr_semaphore[] +== Semaphores + +This section describes the semaphore types and functions defined by the +`<>` extension. + +=== Semaphore Types + +// TODO this is very rough, does not include API definitions or define +// refpage content, etc. + + * {cl_semaphore_type_khr_TYPE} represent the different types of + semaphores. + ** It is mandatory to support {CL_SEMAPHORE_TYPE_BINARY_KHR}. + * {cl_semaphore_properties_khr_TYPE} represents properties associated with + semaphores. + ** {CL_SEMAPHORE_TYPE_KHR} must be supported. + * {cl_semaphore_info_khr_TYPE} represents queries for additional + information about semaphores. + ** All enums described in the "`New API Enums`" section of the + `<>` extension for cl_semaphore_info_khr_TYPE} must + be supported. + * {cl_semaphore_payload_khr_TYPE} represents payload values of semaphores. + * {cl_semaphore_khr_TYPE} represent semaphore objects. + + +=== Creating Semaphores + +[open,refpage='clCreateSemaphoreWithPropertiesKHR',desc='Create a semaphore object',type='protos'] +-- +To create a *semaphore object*, call the function + +include::{generated}/api/protos/clCreateSemaphoreWithPropertiesKHR.txt[] +include::{generated}/api/version-notes/clCreateSemaphoreWithPropertiesKHR.asciidoc[] + + * _context_ identifies a valid OpenCL context that the created + {cl_semaphore_khr_TYPE} will belong to. + * _sema_props_ specifies additional semaphore properties in the form list + of pairs terminated with 0. + {CL_SEMAPHORE_TYPE_KHR} must be part of the list of properties specified + by _sema_props_. + +Following new properties are added to the list of possible supported +properties by {cl_semaphore_properties_khr_TYPE} that can be passed to +{clCreateSemaphoreWithPropertiesKHR}: + +.List of supported semaphore creation properties by {clCreateSemaphoreWithPropertiesKHR} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Semaphore Property | Property Value | Description +| {CL_SEMAPHORE_TYPE_KHR_anchor} + | {cl_semaphore_type_khr_TYPE} + | Specifies the type of semaphore to create. + This property is always required. +| {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR_anchor} + | {cl_device_id_TYPE}[] + | Specifies the list of OpenCL devices (terminated with + {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR}) to associate with the + semaphore. + Only a single device is permitted in the list. + +ifdef::cl_khr_external_semaphore[] +| {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR.asciidoc[] + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Specifies the list of semaphore handle type properties (terminated + with {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR}) that can be + used to export the semaphore being created. +endif::cl_khr_external_semaphore[] +|==== + +If {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not specified as part of +_sema_props_, the semaphore object created by +{clCreateSemaphoreWithPropertiesKHR} is by default accessible to all devices +in the _context_. +For a multi-device context {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} must be +specified in _sema_props_. + +// refError + +_errcode_ret_ returns an appropriate error code. +If _errcode_ret_ is `NULL`, no error code is returned. + +{clCreateSemaphoreWithPropertiesKHR} returns a valid semaphore object in an +un-signaled state and and _errcode_ret_ is set to {CL_SUCCESS} if the +function is executed successfully. +Otherwise, it returns a `NULL` value with one of the following error values +returned in _errcode_ret_: + + * {CL_INVALID_CONTEXT} if _context_ is not a valid context. + * {CL_INVALID_PROPERTY} if a property name in _sema_props_ is not a + supported property name, if the value specified for a supported property + name is not valid, or if the same property name is specified more than + once. + Additionally, if _context_ is a multiple device context and _sema_props_ + does not specify {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR}. + * {CL_INVALID_DEVICE} if {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is + specified as part of _sema_props_, but it does not identify exactly one + valid device; or if a device identified by + {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not one of the devices within + _context_. + * {CL_INVALID_VALUE} + ** if _sema_props_ is `NULL`, or + ** if _sema_props_ do not specify pairs for minimum set + of properties (i.e. {CL_SEMAPHORE_TYPE_KHR}) required for successful + creation of a {cl_semaphore_khr_TYPE}, or + + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +ifdef::cl_khr_external_semaphore[] + * {CL_INVALID_DEVICE} if one or more devices identified by properties + {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} cannot import the requested + external semaphore handle type. + * {CL_INVALID_VALUE} if more than one semaphore handle type is specified + in the {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} list. + * {CL_INVALID_OPERATION} If _props_list_ specifies a + {cl_external_semaphore_handle_type_khr_TYPE} followed by a handle as + well as {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR}. Exporting a semaphore + handle from a semaphore that was created by importing an external + semaphore handle is not permitted. +endif::cl_khr_external_semaphore[] +-- + + +ifdef::cl_khr_external_semaphore[] +=== Exporting Semaphore External Handles + +[open,refpage='clGetSemaphoreHandleForTypeKHR',desc='Export external handle from a semaphore',type='protos'] +-- +To export an external handle from a semaphore, call the function + +include::{generated}/api/protos/clGetSemaphoreHandleForTypeKHR.txt[] + + * _sema_object_ specifies a valid semaphore object with exportable + properties. + * _device_ specifies a valid device for which a semaphore handle is being + requested. + * _handle_type_ specifies the type of semaphore handle that should be + returned for this exportable _sema_object_, and must be one of the + values specified when _sema_object_ was created. + * _handle_size_ specifies the size of memory pointed by _handle_ptr_. + * _handle_ptr_ is a pointer to memory where the exported external handle + is returned. + If _param_value_ is `NULL`, it is ignored. + * _handle_size_ret_ returns the actual size in bytes for the external + handle. + If _handle_size_ret_ is `NULL`, it is ignored. + +// refError + +{clGetSemaphoreHandleForTypeKHR} returns {CL_SUCCESS} if the semaphore +handle is queried successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_SEMAPHORE_KHR} + ** if _sema_object_ is not a valid semaphore +// This is redundant with the error below. + ** if _sema_object_ is not exportable + * {CL_INVALID_DEVICE} + ** if _device_ is not a valid device, or + ** if _sema_object_ belongs to a context that is not associated with + _device_, or + ** if _sema_object_ can not be shared with _device_. + * {CL_INVALID_VALUE} if the requested external semaphore handle type was + not specified when _sema_object_ was created. + * {CL_INVALID_VALUE} if _handle_size_ is less than the size needed to + store the returned handle. +// I don't think this can happen. This would have been checked when the semaphore was created. +// ** if CL_SEMAPHORE_HANDLE_*_KHR is specified as one of the _sema_props_ and +// the property CL_SEMAPHORE_HANDLE_*_KHR does not identify a valid external +// memory handle poperty reported by +// {CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} or +// {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} queries. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +=== Importing Semaphore External Handles + +Applications can import a semaphore payload into an existing semaphore using +an external semaphore handle. +The effects of the import operation will be either temporary or permanent, +as specified by the application. +If the import is temporary, the implementation must restore the semaphore to +its prior permanent state after submitting the next semaphore wait +operation. +Performing a subsequent temporary import on a semaphore before performing a +semaphore wait has no effect on this requirement; the next wait submitted on +the semaphore must still restore its last permanent state. +A permanent payload import behaves as if the target semaphore was destroyed, +and a new semaphore was created with the same handle but the imported +payload. +Because importing a semaphore payload temporarily or permanently detaches +the existing payload from a semaphore, similar usage restrictions to those +applied to {clReleaseSemaphoreKHR} are applied to any command that imports a +semaphore payload. +Which of these import types is used is referred to as the import operation's +permanence. +Each handle type supports either one or both types of permanence. + +The implementation must perform the import operation by either referencing +or copying the payload referred to by the specified external semaphore +handle, depending on the handle's type. +The import method used is referred to as the handle type's transference. +When using handle types with reference transference, importing a payload to +a semaphore adds the semaphore to the set of all semaphores sharing that +payload. +This set includes the semaphore from which the payload was exported. +Semaphore signaling and waiting operations performed on any semaphore in the +set must behave as if the set were a single semaphore. +Importing a payload using handle types with copy transference creates a +duplicate copy of the payload at the time of import, but makes no further +reference to it. +Semaphore signaling and waiting operations performed on the target of copy +imports must not affect any other semaphore or payload. + +Export operations have the same transference as the specified handle type's +import operations. +Additionally, exporting a semaphore payload to a handle with copy +transference has the same side effects on the source semaphore's payload as +executing a semaphore wait operation. +If the semaphore was using a temporarily imported payload, the semaphore's +prior permanent payload will be restored. + +Please refer to handle specific specifications for more details on +transference and permanence requirements specific to handle type. + + +=== Descriptions of External Semaphore Handle Types + +This section describes the external semaphore handle types that are added by +related extensions. + +Applications can import the same semaphore payload into multiple OpenCL +contexts, into the same context from which it was exported, and multiple +times into a given OpenCL context. +In all cases, each import operation must create a distinct semaphore object. + + +ifdef::cl_khr_external_semaphore_opaque_fd,cl_khr_external_semaphore_sync_fd[] + +==== File Descriptor Handle Types + +ifdef::cl_khr_external_semaphore_opaque_fd[] + +The `<>` extension extends +{cl_external_semaphore_handle_type_khr_TYPE} to support the following new +types of handles, and adds as a property that may be specified when creating +a semaphore from an external handle: + + * {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR} specifies a POSIX file descriptor + handle that has only limited valid usage outside of OpenCL and other + compatible APIs. + It must be compatible with the POSIX system calls `dup`, `dup2`, + `close`, and the non-standard system call `dup3`. + Additionally, it must be transportable over a socket using an + `SCM_RIGHTS` control message. + It owns a reference to the underlying synchronization primitive + represented by its semaphore object. + +endif::cl_khr_external_semaphore_opaque_fd[] + +ifdef::cl_khr_external_semaphore_sync_fd[] + +The `<>` extension extends +{cl_external_semaphore_handle_type_khr_TYPE} to support the following new +types of handles, and adds as a property that may be specified when creating +a semaphore from an external handle: + + * {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} specifies a POSIX file descriptor + handle to a Linux Sync File or Android Fence object. + It can be used with any native API accepting a valid sync file or fence + as input. + It owns a reference to the underlying synchronization primitive + associated with the file descriptor. + Implementations which support importing this handle type must accept any + type of sync or fence FD supported by the native system they are running + on. + +The special value -1 for fd is treated like a valid sync file descriptor +referring to an object that has already signaled. +The import operation will succeed and the semaphore will have a temporarily +imported payload as if a valid file descriptor had been provided. + +Note: This special behavior for importing an invalid sync file descriptor +allows easier interoperability with other system APIs which use the +convention that an invalid sync file descriptor represents work that has +already completed and does not need to be waited for. +It is consistent with the option for implementations to return a -1 file +descriptor when exporting a {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} from a +{cl_semaphore_khr_TYPE} which is signaled. + +endif::cl_khr_external_semaphore_sync_fd[] + +.Transference and Permanence Properties for File Descriptor Handles +[width="100%",cols="60%,<20%,<20%",options="header"] +|==== +| Handle Type | Transference | Permanence + +ifdef::cl_khr_external_semaphore_opaque_fd[] +| {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR.asciidoc[] + | Reference + | Temporary, Permanent +endif::cl_khr_external_semaphore_opaque_fd[] + +ifdef::cl_khr_external_semaphore_sync_fd[] +| {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_SYNC_FD_KHR.asciidoc[] + | Copy + | Temporary +endif::cl_khr_external_semaphore_sync_fd[] +|==== + +Importing a semaphore payload from a file descriptor transfers ownership of +the file descriptor from the application to the OpenCL implementation. +The application must not perform any operations on the file descriptor after +a successful import. + +ifdef::cl_khr_external_semaphore_sync_fd[] +[open,refpage='clReImportSemaphoreSyncFdKHR',desc='Re-import sync fd handle into an existing semaphore',type='protos'] +-- +To re-imported a handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} into an +existing semaphore, call the function: + +include::{generated}/api/protos/clReImportSemaphoreSyncFdKHR.txt[] + + * _sema_object_ specifies a valid semaphore object with importable + properties. + * _reimport_props_ must be `NULL`, and is reserved for future use. + * _fd_ specifies an external file descriptor handle to import + +Calling {clReImportSemaphoreSyncFdKHR} is equivalent to destroying +_sema_object_ and re-creating it with the original _sema_props_ from +{clCreateSemaphoreWithPropertiesKHR}, except a handle specified by _fd_ will +be imported. +The semaphore _sema_object_ must have originally imported an external handle +of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR}. + +// refError + +{clGetSemaphoreHandleForTypeKHR} returns {CL_SUCCESS} if the semaphore +handle is re-imported successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_SEMAPHORE_KHR} + ** if _sema_object_ is not a valid semaphore + * {CL_INVALID_SEMAPHORE_KHR} if a {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} handle + was not imported when _sema_object_ was created. + * {CL_INVALID_VALUE} if _fd_ is invalid. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. +-- +endif::cl_khr_external_semaphore_sync_fd[] + +endif::cl_khr_external_semaphore_opaque_fd,cl_khr_external_semaphore_sync_fd[] + + +ifdef::cl_khr_external_semaphore_dx_fence,cl_khr_external_semaphore_win32[] + +==== NT Handle Types + +ifdef::cl_khr_external_semaphore_dx_fence[] + +The `<>` extension extends +{cl_external_semaphore_handle_type_khr_TYPE} to support the following new +types of handles, and adds as a property that may be specified when creating +a semaphore from an external handle: + + * {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} specifies an NT handle returned by + `ID3D12Device::CreateSharedHandle` referring to a Direct3D 12 fence, or + `ID3D11Device5::CreateFence` referring to a Direct3D 11 fence. + It owns a reference to the underlying synchronization primitive + associated with the Direct3D fence. + +When waiting on semaphores using {clEnqueueWaitSemaphoresKHR} or signaling +semaphores using {clEnqueueSignalSemaphoresKHR}, the semaphore payload must +be provided for semaphores created from +{CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR}. + + * If _sema_objects_ list has a mix of semaphores obtained from + {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} and other handle types, then the + _sema_payload_list_ should point to a list of _num_sema_objects_ payload + values for each semaphore in _sema_objects_. + However, the payload values corresponding to semaphores with type + {CL_SEMAPHORE_TYPE_BINARY_KHR} can be set to 0 or will be ignored. + +{clEnqueueWaitSemaphoresKHR} and {clEnqueueSignalSemaphoresKHR} may return +{CL_INVALID_VALUE} if _sema_objects_ list has one or more semaphores +obtained from {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} and _sema_payload_list_ +is NULL. + +endif::cl_khr_external_semaphore_dx_fence[] + +ifdef::cl_khr_external_semaphore_win32[] + +The `<>` extension extends +{cl_external_semaphore_handle_type_khr_TYPE} to support the following new +types of handles, and adds as a property that may be specified when creating +a semaphore from an external handle: + + * {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR} specifies an NT handle that has + only limited valid usage outside of OpenCL and other compatible APIs. + It must be compatible with the functions `DuplicateHandle`, + `CloseHandle`, `CompareObjectHandles`, `GetHandleInformation`, and + `SetHandleInformation`. + It owns a reference to the underlying synchronization primitive + represented by its semaphore object. + * {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR} specifies a global share + handle that has only limited valid usage outside of OpenCL and other + compatible APIs. + It is not compatible with any native APIs. + It does not own a reference to the underlying synchronization primitive + represented by its semaphore object, and will therefore become invalid + when all semaphore objects associated with it are destroyed. + +endif::cl_khr_external_semaphore_win32[] + +.Transference and Permanence Properties for NT Handle Types +[width="100%",cols="60%,<20%,<20%",options="header"] +|==== +| Handle Type | Transference | Permanence + +ifdef::cl_khr_external_semaphore_dx_fence[] +| {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR.asciidoc[] + | Reference + | Temporary, Permanent +endif::cl_khr_external_semaphore_dx_fence[] + +ifdef::cl_khr_external_semaphore_win32[] +| {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR.asciidoc[] + | Reference + | Temporary, Permanent +| {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR_anchor} + +include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR.asciidoc[] + | Reference + | Temporary, Permanent +endif::cl_khr_external_semaphore_win32[] +|==== + +// TODO Why "Windows handles" here but "NT handles" elsewhere? + +Importing a semaphore payload from Windows handles does not transfer +ownership of the handle to the OpenCL implementation. +For handle types defined as NT handles, the application must release +ownership using the `CloseHandle` system call when the handle is no longer +needed. + +endif::cl_khr_external_semaphore_dx_fence,cl_khr_external_semaphore_win32[] + +endif::cl_khr_external_semaphore[] + + +=== Waiting On and Signaling Semaphores + +[open,refpage='clEnqueueWaitSemaphoresKHR',desc='Enqueue a command to wait on a set of semaphores',type='protos'] +-- +To enqueue a command to wait on a set of semaphores, call the function + +include::{generated}/api/protos/clEnqueueWaitSemaphoresKHR.txt[] +include::{generated}/api/version-notes/clEnqueueWaitSemaphoresKHR.asciidoc[] + + * _command_queue_ specifies a valid command-queue. + * _num_sema_objects_ specifies the number of semaphore objects to wait on. + * _sema_objects_ points to the list of semaphore objects to wait on. + The length of the list must be at least _num_sema_objects_. + * _sema_payload_list_ points to the list of values of type + {cl_semaphore_payload_khr_TYPE} containing valid semaphore payload + values to wait on. + This can be set to `NULL` or will be ignored when all semaphores in the + list of _sema_objects_ are of type {CL_SEMAPHORE_TYPE_BINARY_KHR}. + * _num_events_in_wait_list_ specifies the number of events in + _event_wait_list_. + * _event_wait_list_ specifies list of events that need to complete before + {clEnqueueWaitSemaphoresKHR} can be executed. + If _event_wait_list_ is `NULL`, then {clEnqueueWaitSemaphoresKHR} does + not wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and that + associated with _command_queue_ must be the same. + * _event_ returns an event object that identifies this particular command + and can be used to query or queue a wait for this particular command to + complete. + _event_ can be `NULL`, in which case it will not be possible for the + application to query the status of this command or queue a wait for this + command to complete. + +The semaphore wait command waits for a list of events to complete and a list +of semaphore objects to become signaled. +The semaphore wait command returns an _event_ which can be waited on to +ensure that all events in the _event_wait_list_ have completed and all +semaphores in _sema_objects_ have been signaled. +{clEnqueueWaitSemaphoresKHR} will not return until the binary semaphores in +_sema_objects_ are in a state that makes them safe to re-signal. +If necessary, implementations may block in {clEnqueueWaitSemaphoresKHR} to +ensure the correct state of semaphores when returning. +There are no implications from this behavior for the state of _event_ or the +events in _event_wait_list_ when {clEnqueueWaitSemaphoresKHR} returns. +Waiting on the same binary semaphore twice without an interleaving signal +may lead to undefined behavior. + +// refError + +{clEnqueueWaitSemaphoresKHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} + ** if _command_queue_ is not a valid command-queue, or + ** if the device associated with _command_queue_ is not same as one of the + devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time + of creating one or more of _sema_objects_. + * {CL_INVALID_VALUE} if _num_sema_objects_ is 0. + * {CL_INVALID_SEMAPHORE_KHR} if any of the semaphore objects specified by + _sema_objects_ is not valid. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and + any of the semaphore objects in _sema_objects_ are not the same, or if + the context associated with _command_queue_ and that associated with + events in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if any of the semaphore objects specified by + _sema_objects_ requires a semaphore payload and _sema_payload_list_ is + `NULL`. + * {CL_INVALID_EVENT_WAIT_LIST} + ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, + or + ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, + or + ** if event objects in _event_wait_list_ are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status + of any of the events in _event_wait_list_ is a negative integer value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clEnqueueSignalSemaphoresKHR',desc='Enqueue a command to signal a set of semaphores',type='protos'] +-- +To enqueue a command to signal a set of semaphores, call the function + +include::{generated}/api/protos/clEnqueueSignalSemaphoresKHR.txt[] +include::{generated}/api/version-notes/clEnqueueSignalSemaphoresKHR.asciidoc[] + + * _command_queue_ specifies a valid command-queue. + * _num_sema_objects_ specifies the number of semaphore objects to signal. + * _sema_objects_ points to the list of semaphore objects to signal. + The length of the list must be at least _num_sema_objects_. + * _sema_payload_list_ points to the list of values of type + {cl_semaphore_payload_khr_TYPE} containing semaphore payload values to + signal. + This can be set to `NULL` or will be ignored when all semaphores in the + list of _sema_objects_ are of type {CL_SEMAPHORE_TYPE_BINARY_KHR}. + * _num_events_in_wait_list_ specifies the number of events in + * _event_wait_list_ points to the list of events that need to complete + before {clEnqueueSignalSemaphoresKHR} can be executed. + If _event_wait_list_ is `NULL`, then {clEnqueueSignalSemaphoresKHR} does + not wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If _event_wait_list_ is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and that + associated with _command_queue_ must be the same. ++ +_event_ returns an event object that identifies this particular command +and can be used to query or queue a wait for this particular command to +complete. +_event_ can be `NULL`, in which case it will not be possible for the +application to query the status of this command or queue a wait for this +command to complete. + +The semaphore signal command waits for a list of events to complete and then +signals a list of semaphore objects. +The semaphore signal command returns an _event_ which can be waited on to +ensure that all events in the _event_wait_list_ have completed and all +semaphores in _sema_objects_ have been signaled. +The successful completion of the event generated by +{clEnqueueSignalSemaphoresKHR} called on one or more semaphore objects of +type {CL_SEMAPHORE_TYPE_BINARY_KHR} changes the state of the corresponding +semaphore objects to signaled. +{clEnqueueSignalSemaphoresKHR} will not return until the binary semaphores +in _sema_objects_ are in a state that makes them safe to wait on again. +If necessary, implementations may block in {clEnqueueSignalSemaphoresKHR} to +ensure the correct state of semaphores when returning. +There are no implications from this behavior for the state of _event_ or the +events in _event_wait_list_ when {clEnqueueSignalSemaphoresKHR} returns. +Signaling the same binary semaphore twice without an interleaving wait may +lead to undefined behavior. + +// refError + +{clEnqueueSignalSemaphoresKHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_QUEUE} + ** if _command_queue_ is not a valid command-queue, or + ** if the device associated with _command_queue_ is not same as one of the + devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time of + creating one or more of _sema_objects_, or + ** if one or more of _sema_objects_ belong to a context that does not + contain a device associated with _command_queue_. + * {CL_INVALID_VALUE} if _num_sema_objects_ is 0. + * {CL_INVALID_SEMAPHORE_KHR} if any of the semaphore objects specified by + _sema_objects_ is not valid. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and + any of the semaphore objects in _sema_objects_ are not the same, or if + the context associated with _command_queue_ and that associated with + events in _event_wait_list_ are not the same. + * {CL_INVALID_VALUE} if any of the semaphore objects specified by + _sema_objects_ requires a semaphore payload and _sema_payload_list_ is + `NULL`. + * {CL_INVALID_EVENT_WAIT_LIST} + ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not + 0, or + ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is + 0, or + ** if event objects in _event_wait_list_ are not valid events. + * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status + of any of the events in _event_wait_list_ is a negative integer value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +=== Retaining and Releasing Semaphores + +[open,refpage='clReleaseSemaphoreKHR',desc='Release a semaphore object',type='protos'] +-- +To release a semaphore object, call the function + +include::{generated}/api/protos/clReleaseSemaphoreKHR.txt[] +include::{generated}/api/version-notes/clReleaseSemaphoreKHR.asciidoc[] + + * _sema_object_ specifies the semaphore object to be released. + +The _sema_object_ reference count is decremented. + +// refError + +{clReleaseSemaphoreKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_SEMAPHORE_KHR} if _sema_object_ is not a valid semaphore + object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. + +After the reference count becomes zero and commands queued for execution on +a command-queue(s) that use _sema_object_ have finished, the semaphore +object is deleted. +Using this function to release a reference that was not obtained by creating +the object via {clCreateSemaphoreWithPropertiesKHR} or by calling +{clRetainSemaphoreKHR} causes undefined behavior. +-- + +[open,refpage='clRetainSemaphoreKHR',desc='Retain a semaphore object',type='protos'] +-- +To retain a semaphore object, call the function + +include::{generated}/api/protos/clRetainSemaphoreKHR.txt[] +include::{generated}/api/version-notes/clRetainSemaphoreKHR.asciidoc[] + + * _sema_object_ specifies the semaphore object to be retained. + +{clRetainSemaphoreKHR} increments the reference count of _sema_object_. + +// refError + +{clRetainSemaphoreKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_SEMAPHORE_KHR} if _sema_object_ is not a valid semaphore + object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +=== Semaphore Queries + +[open,refpage='clGetSemaphoreInfoKHR',desc='Query information about a semaphore object',type='protos'] +-- +To query information about a semaphore object, call the function + +include::{generated}/api/protos/clGetSemaphoreInfoKHR.txt[] +include::{generated}/api/version-notes/clGetSemaphoreInfoKHR.asciidoc[] + + * _sema_object_ specifies the semaphore object being queried. + * _param_name_ is a constant that specifies the semaphore information to + query, and must be one of the values shown in the + <> table. + * _param_value_ is a pointer to memory where the result of the query is + returned as described in the <> table. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ specifies the size in bytes of memory pointed to + _param_value_. + This size must be greater than or equal to the size of the return type + described in the <> + table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_value_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[cl_khr_semaphore_info-table]] +.List of parameter names supported by {clGetSemaphoreInfoKHR} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Semaphore Info | Return Type | Description +| {CL_SEMAPHORE_CONTEXT_KHR_anchor} + | {cl_context_TYPE} + | Returns the context specified when the semaphore is created. +| {CL_SEMAPHORE_REFERENCE_COUNT_KHR_anchor} footnote:[{fn-reference-count-usage}] + | {cl_uint_TYPE} + | Returns the semaphore reference count. +| {CL_SEMAPHORE_PROPERTIES_KHR_anchor} + | {cl_semaphore_properties_khr_TYPE}[] + | Return the properties argument specified in + {clCreateSemaphoreWithPropertiesKHR}. + + The implementation must return the values specified in the + properties argument in the same order and without including + additional properties. +| {CL_SEMAPHORE_TYPE_KHR_anchor} + | {cl_semaphore_type_khr_TYPE} + | Returns the semaphore type. +| {CL_SEMAPHORE_PAYLOAD_KHR_anchor} + | {cl_semaphore_payload_khr_TYPE} + | Returns the semaphore payload value. + For semaphores of type {CL_SEMAPHORE_TYPE_BINARY_KHR} the payload + value returned will be `0` if the semaphore is in an un-signaled + state, and `1` if it is in a signaled state. +| {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR_anchor} + | {cl_device_id_TYPE}[] + | Returns the list of OpenCL devices the semaphore is associated with. + +ifdef::cl_khr_external_semaphore[] +| {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR_anchor} + | {cl_external_semaphore_handle_type_khr_TYPE}[] + | Returns the list of external semaphore handle types that may be used + for exporting. + The size of this query may be 0 indicating that this semaphore does + not support any handle types for exporting. +| {CL_SEMAPHORE_EXPORTABLE_KHR_anchor} + | {cl_bool_TYPE}[] + | Returns {CL_TRUE} if the semaphore is exportable and {CL_FALSE} + otherwise. +endif::cl_khr_external_semaphore[] +|==== + +// refError + +{clGetSemaphoreInfoKHR} returns {CL_SUCCESS} if the information is queried +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_SEMAPHORE_KHR} + ** if _sema_object_ is not a valid semaphore + * {CL_INVALID_VALUE} + ** if _param_name_ is not one of the attribute defined in the + <> table or + ** if _param_value_size_ is less than the size of Return Type of the + corresponding _param_name_ attribute as defined in the + <> table. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +endif::cl_khr_semaphore[] + + +== Out-of-Order Execution of Kernels and Memory Object Commands + +The OpenCL functions that are submitted to a command-queue are enqueued in +the order the calls are made but can be configured to execute in-order or +out-of-order. +The _properties_ argument in {clCreateCommandQueueWithProperties} or +{clCreateCommandQueue} can be used to specify the execution order. + +If the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property of a command-queue is +not set, the commands enqueued to a command-queue execute in-order. +For example, if an application calls {clEnqueueNDRangeKernel} to execute +kernel A followed by a {clEnqueueNDRangeKernel} to execute kernel B, the +application can assume that kernel A finishes first and then kernel B is +executed. +If the memory objects output by kernel A are inputs to kernel B then kernel +B will see the correct data in memory objects produced by execution of +kernel A. +If the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property of a command-queue is +set, then there is no guarantee that kernel A will finish before kernel B +starts execution. + +Applications can configure the commands enqueued to a command-queue to +execute out-of-order by setting the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} +property of the command-queue. +This can be specified when the command-queue is created. +In out-of-order execution mode there is no guarantee that the enqueued +commands will finish execution in the order they were queued. +As there is no guarantee that kernels will be executed in-order, i.e. based +on when the {clEnqueueNDRangeKernel} or {clEnqueueTask} calls are made within a +command-queue, it is therefore possible that an earlier +{clEnqueueNDRangeKernel} call to execute kernel A identified by event A may +execute and/or finish later than a {clEnqueueNDRangeKernel} call to execute +kernel B which was called by the application at a later point in time. +To guarantee a specific order of execution of kernels, a wait on a +particular event (in this case event A) can be used. +The wait for event A can be specified in the _event_wait_list_ argument to +{clEnqueueNDRangeKernel} for kernel B. + +In addition, a marker ({clEnqueueMarker} or {clEnqueueMarkerWithWaitList}) or a +barrier ({clEnqueueBarrier} or {clEnqueueBarrierWithWaitList}) command can be +enqueued to the command-queue. +The marker command ensures that previously enqueued commands identified by +the list of events to wait for (or all previous commands) have finished. +A barrier command is similar to a marker command, but additionally +guarantees that no later-enqueued commands will execute until the waited-for +commands have executed. + +Similarly, commands to read, write, copy or map memory objects that are +enqueued after {clEnqueueNDRangeKernel}, {clEnqueueTask} or +{clEnqueueNativeKernel} commands are not guaranteed to wait for kernels +scheduled for execution to have completed (if the +{CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property is set). +To ensure correct ordering of commands, the event object returned by +{clEnqueueNDRangeKernel}, {clEnqueueTask} or {clEnqueueNativeKernel} can be +used to enqueue a wait for event or a barrier command can be enqueued that must +complete before reads or writes to the memory object(s) occur. + + +[[profiling-operations]] +== Profiling Operations on Memory Objects and Kernels + +This section describes the profiling of OpenCL functions that are enqueued +as commands to a command-queue. Profiling of OpenCL commands can be enabled +by using a command-queue created with the {CL_QUEUE_PROFILING_ENABLE} +flag set in the {CL_QUEUE_PROPERTIES} bitfield in the _properties_ argument to +{clCreateCommandQueueWithProperties}, or in the _properties_ argument to +{clCreateCommandQueue}. +When profiling is enabled, the event objects that are created from +enqueuing a command store a timestamp for each of their state transitions. + +[open,refpage='clGetEventProfilingInfo',desc='Returns profiling information for the command associated with event if profiling is enabled.',type='protos'] +-- +To return profiling information for a command associated with an event when +profiling is enabled, call the function + +include::{generated}/api/protos/clGetEventProfilingInfo.txt[] +include::{generated}/api/version-notes/clGetEventProfilingInfo.asciidoc[] + + * _event_ specifies the event object. + * _param_name_ specifies the profiling data to query. + The list of supported _param_name_ types and the information returned in + _param_value_ by {clGetEventProfilingInfo} is described in the + <> table. + * _param_value_ is a pointer to memory where the appropriate result being + queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ is used to specify the size in bytes of memory pointed to + by _param_value_. + This size must be {geq} size of return type as described in the + <> table. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_name_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +[[event-profiling-info-table]] +.List of supported param_names by {clGetEventProfilingInfo} +[width="100%",cols="<33%,<17%,<50%",options="header"] +|==== +| Event Profiling Info | Return Type | Description +| {CL_PROFILING_COMMAND_QUEUED_anchor} + +include::{generated}/api/version-notes/CL_PROFILING_COMMAND_QUEUED.asciidoc[] + | {cl_ulong_TYPE} + | A 64-bit value that describes the current device time counter in + nanoseconds when the command identified by event is enqueued in a + command-queue by the host. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is supported, + for events returned by a command-buffer enqueue operation to multiple + command-queues, the host time when the command-buffer has been enqueued + across the command-queues is used. +endif::cl_khr_command_buffer_multi_device[] + +| {CL_PROFILING_COMMAND_SUBMIT_anchor} + +include::{generated}/api/version-notes/CL_PROFILING_COMMAND_SUBMIT.asciidoc[] + | {cl_ulong_TYPE} + | A 64-bit value that describes the current device time counter in + nanoseconds when the command identified by event that has been + enqueued is submitted by the host to the device associated with the + command-queue. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is supported, + for events returned by a command-buffer enqueue operation to multiple + command-queues, the host time is used when command-buffer commands have + been submitted to any command-queue. +endif::cl_khr_command_buffer_multi_device[] + +| {CL_PROFILING_COMMAND_START_anchor} + +include::{generated}/api/version-notes/CL_PROFILING_COMMAND_START.asciidoc[] + | {cl_ulong_TYPE} + | A 64-bit value that describes the current device time counter in + nanoseconds when the command identified by event starts execution on + the device. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is supported, + for events returned by a command-buffer enqueue operation to multiple + command-queues, the host time is used when any device starts executing a + command-buffer command. +endif::cl_khr_command_buffer_multi_device[] + +| {CL_PROFILING_COMMAND_END_anchor} + +include::{generated}/api/version-notes/CL_PROFILING_COMMAND_END.asciidoc[] + | {cl_ulong_TYPE} + | A 64-bit value that describes the current device time counter in + nanoseconds when the command identified by event has finished + execution on the device. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is supported, + for events returned by a command-buffer enqueue operation to multiple + command-queues, the host time is used when the last command-buffer + command finishes execution on any device. +endif::cl_khr_command_buffer_multi_device[] + +| {CL_PROFILING_COMMAND_COMPLETE_anchor} + +include::{generated}/api/version-notes/CL_PROFILING_COMMAND_COMPLETE.asciidoc[] + | {cl_ulong_TYPE} + | A 64-bit value that describes the current device time counter in + nanoseconds when the command identified by event and any child + commands enqueued by this command on the device have finished + execution. + +ifdef::cl_khr_command_buffer_multi_device[] + If the `<>` extension is supported, + for events returned by a command-buffer enqueue operation to multiple + command-queues, the host time is used when the command-buffer has + completed execution across all command-queues. +endif::cl_khr_command_buffer_multi_device[] + +|==== + +The unsigned 64-bit values returned can be used to measure the time in +nano-seconds consumed by OpenCL commands. + +OpenCL devices are required to correctly track time across changes in device +frequency and power states. +The {CL_DEVICE_PROFILING_TIMER_RESOLUTION} specifies the resolution of the +timer i.e. the number of nanoseconds elapsed before the timer is +incremented. + +ifdef::cl_khr_command_buffer_multi_device[] +[NOTE] +==== +If the `<>` extension is supported, and +if no reliable device timer sources are available to inform the host side, +or parallel runtime scheduling makes it impossible to identify a first/last +command, then an implementation may fallback to reporting +{CL_PROFILING_COMMAND_SUBMIT} and {CL_PROFILING_COMMAND_COMPLETE} for +{CL_PROFILING_COMMAND_START} and {CL_PROFILING_COMMAND_END} respectively. +==== +endif::cl_khr_command_buffer_multi_device[] + +// refError + +{clGetEventProfilingInfo} returns {CL_SUCCESS} if the function is executed +successfully and the profiling information has been recorded. +Otherwise, it returns one of the following errors: + + * {CL_PROFILING_INFO_NOT_AVAILABLE} if the {CL_QUEUE_PROFILING_ENABLE} flag is + not set for the command-queue, if the execution status of the command + identified by _event_ is not {CL_COMPLETE} or if _event_ is a user event + object. + Prior to OpenCL 3.0, implementations may return + {CL_PROFILING_INFO_NOT_AVAILABLE} for an event created by + {clEnqueueSvmFree}. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported, + and if _event_ was created from a call to {clEnqueueCommandBufferKHR}, + {CL_PROFILING_INFO_NOT_AVAILABLE} is returned if all the queues passed + do not have {CL_QUEUE_PROFILING_ENABLE} set. +endif::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes + specified by _param_value_size_ is < size of return type as described in + the <> table and + _param_value_ is not `NULL`. + * {CL_INVALID_EVENT} if _event_ is a not a valid event object. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clEnqueueTask',desc='Enqueues a command to execute a kernel, using a single work-item, on a device.',type='protos'] + +== Flush and Finish + +[open,refpage='clFlush',desc='Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.',type='protos'] -- -To enqueue a command to execute a kernel on a device, using a single work-item, -call the function +To flush commands to a device, call the function -include::{generated}/api/protos/clEnqueueTask.txt[] -include::{generated}/api/version-notes/clEnqueueTask.asciidoc[] +include::{generated}/api/protos/clFlush.txt[] +include::{generated}/api/version-notes/clFlush.asciidoc[] - * _command_queue_ is a valid host command-queue. - The kernel will be queued for execution on the device associated with - _command_queue_. - * _kernel_ is a valid kernel object. - The OpenCL context associated with _kernel_ and _command-queue_ must be the - same. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - If _event_wait_list_ is `NULL`, then this particular command does not wait - on any event to complete. - If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. - If _event_wait_list_ is not `NULL`, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. + * _command_queue_ is the command-queue to flush. -{clEnqueueTask} is equivalent to calling {clEnqueueNDRangeKernel} with -_work_dim_ set to 1, _global_work_offset_ set to `NULL`, _global_work_size[0]_ -set to 1, and _local_work_size[0]_ set to 1. +All previously queued OpenCL commands in _command_queue_ are issued to the +device associated with _command_queue_. +{clFlush} only guarantees that all queued commands to _command_queue_ will +eventually be submitted to the appropriate device. +There is no guarantee that they will be complete after {clFlush} returns. + +Any blocking commands queued in a command-queue and {clReleaseCommandQueue} +perform an implicit flush of the command-queue. +These blocking commands are {clEnqueueReadBuffer}, +{clEnqueueReadBufferRect}, {clEnqueueReadImage}, with _blocking_read_ set to +{CL_TRUE}; {clEnqueueWriteBuffer}, {clEnqueueWriteBufferRect}, +{clEnqueueWriteImage} with _blocking_write_ set to {CL_TRUE}; +{clEnqueueMapBuffer}, {clEnqueueMapImage} with _blocking_map_ set to +{CL_TRUE}; {clEnqueueSVMMemcpy} with _blocking_copy_ set to {CL_TRUE}; +{clEnqueueSVMMap} with _blocking_map_ set to {CL_TRUE} or {clWaitForEvents}. + +To use event objects that refer to commands enqueued in a command-queue as +event objects to wait on by commands enqueued in a different command-queue, +the application must call a {clFlush} or any blocking commands that perform +an implicit flush of the command-queue where the commands that refer to +these event objects are enqueued. // refError -{clEnqueueTask} returns {CL_SUCCESS} if the kernel-instance was successfully -queued. +{clFlush} returns {CL_SUCCESS} if the function call was executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built program - executable available for device associated with _command_queue_. * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host command-queue. - * {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and - _kernel_ are not the same or if the context associated with - _command_queue_ and events in _event_wait_list_ are not the same. - * {CL_INVALID_KERNEL_ARGS} if the kernel argument values have not been - specified. - * {CL_INVALID_WORK_GROUP_SIZE} if a work-group size is specified for _kernel_ - in the program source and it is not (1, 1, 1). -// TODO I'm not sure if the next error makes sense for a 'task'. - * {CL_INVALID_WORK_GROUP_SIZE} if the required number of sub-groups is - specified for _kernel_ in the program source and is not consistent with a - work-group size of (1, 1, 1). - * {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is specified as - the value for an argument that is a buffer object and the _offset_ - specified when the sub-buffer object is created is not aligned to - {CL_DEVICE_MEM_BASE_ADDR_ALIGN} value for device associated with _queue_. - This error code is <> version 1.1. - * {CL_INVALID_IMAGE_SIZE} if an image object is specified as an argument - value and the image dimensions (image width, height, specified or - compute row and/or slice pitch) are not supported by device associated - with _queue_. - * {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is specified as an - argument value and the image format (image channel order and data type) - is not supported by device associated with _queue_. - * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution - instance of _kernel_ on the command-queue because of insufficient - resources needed to execute the kernel. See how this error code is used - with {clEnqueueNDRangeKernel} for examples. - * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate - memory for data store associated with image or buffer objects specified - as arguments to _kernel_. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel - and the device does not support SVM or if system pointers are passed as - arguments to a kernel and/or stored inside SVM allocations passed as - kernel arguments and the device does not support fine grain system SVM - allocations. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clEnqueueNativeKernel',desc='Enqueues a command to execute a native C/C++ function not compiled using the OpenCL compiler.',type='protos'] +[open,refpage='clFinish',desc='Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.',type='protos'] -- -To enqueue a command to execute a native C/{cpp} function not compiled using -the OpenCL compiler, call the function +To wait for completion of commands on a device, call the function -include::{generated}/api/protos/clEnqueueNativeKernel.txt[] -include::{generated}/api/version-notes/clEnqueueNativeKernel.asciidoc[] +include::{generated}/api/protos/clFinish.txt[] +include::{generated}/api/version-notes/clFinish.asciidoc[] - * _command_queue_ is a valid host command-queue. - A native user function can only be executed on a command-queue created on a - device that has {CL_EXEC_NATIVE_KERNEL} capability set in - {CL_DEVICE_EXECUTION_CAPABILITIES} as specified in the - <> table. - * _user_func_ is a pointer to a host-callable user function. - It is the application's responsibility to ensure that the host-callable user - function is thread-safe. - * _args_ is a pointer to the args list that _user_func_ should be called with. - * _cb_args_ is the size in bytes of the args list that _args_ points to. - * _num_mem_objects_ is the number of buffer objects that are passed in _args_. - * _mem_list_ is a list of valid buffer objects, if _num_mem_objects_ > 0. - The buffer object values specified in _mem_list_ are memory object handles - (`{cl_mem_TYPE}` values) returned by {clCreateBuffer} or {clCreateBufferWithProperties}, - or `NULL`. - * _args_mem_loc_ is a pointer to appropriate locations that _args_ points to - where memory object handles ({cl_mem_TYPE} values) are stored. - Before the user function is executed, the memory object handles are replaced - by pointers to global memory. - * _event_wait_list_, _num_events_in_wait_list_ and _event_ are as described in - {clEnqueueNDRangeKernel}. + * _command_queue_ is the command-queue to wait for. -The data pointed to by _args_ and _cb_args_ bytes in size will be copied and -a pointer to this copied region will be passed to _user_func_. -The copy needs to be done because the memory objects ({cl_mem_TYPE} values) that -_args_ may contain need to be modified and replaced by appropriate pointers -to global memory. -When {clEnqueueNativeKernel} returns, the memory region pointed to by _args_ -can be reused by the application. +All previously queued OpenCL commands in _command_queue_ are issued to the +associated device, and the function blocks until all previously queued +commands have completed. +{clFinish} does not return until all previously queued commands in +_command_queue_ have been processed and completed. +{clFinish} is also a synchronization point. // refError -{clEnqueueNativeKernel} returns {CL_SUCCESS} if the user function execution -instance was successfully queued. +{clFinish} returns {CL_SUCCESS} if the function call was executed +successfully. Otherwise, it returns one of the following errors: * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_VALUE} if _user_func_ is `NULL`. - * {CL_INVALID_VALUE} if _args_ is a `NULL` value and _cb_args_ > 0, or if - _args_ is a `NULL` value and _num_mem_objects_ > 0. - * {CL_INVALID_VALUE} if _args_ is not `NULL` and _cb_args_ is 0. - * {CL_INVALID_VALUE} if _num_mem_objects_ > 0 and _mem_list_ or - _args_mem_loc_ are `NULL`. - * {CL_INVALID_VALUE} if _num_mem_objects_ = 0 and _mem_list_ or - _args_mem_loc_ are not `NULL`. - * {CL_INVALID_OPERATION} if the device associated with _command_queue_ - cannot execute the native kernel. - * {CL_INVALID_MEM_OBJECT} if one or more memory objects specified in - _mem_list_ are not valid or are not buffer objects. - * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution - instance of _kernel_ on the command-queue because of insufficient - resources needed to execute the kernel. - * {CL_MEM_OBJECT_ALLOCATION_FAILURE} if there is a failure to allocate - memory for data store associated with buffer objects specified as - arguments to _kernel_. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_INVALID_OPERATION} if SVM pointers are passed as arguments to a kernel - and the device does not support SVM or if system pointers are passed as - arguments to a kernel and/or stored inside SVM allocations passed as - kernel arguments and the device does not support fine grain system SVM - allocations. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. +-- + +ifdef::cl_khr_command_buffer[] + +== Command-Buffers + +A _command-buffer_ object represents a series of operations to be enqueued +on one or more command-queues without any application code interaction. +Grouping the operations together allows efficient enqueuing of repetitive +operations, as well as enabling driver optimizations. + +Command-buffers are _sequential use_ by default, but may also be set to +_simultaneous use_ on creation if the device optionally supports this +capability. +A sequential use command-buffer must have a <> +of 0 or 1. +The simultaneous use capability removes this restriction and allows +command-buffers to have a <> greater than 1. + +[[compatible]] +Command-buffers are created using an ordered list of command-queues that +commands are recorded to and execute on by default. +These command-queues can be replaced on command-buffer enqueue with +different command-queues, provided for each element in the replacement list +the substitute command-queue is compatible with the command-queue used on +command-buffer creation. +A _compatible_ command-queue is defined as a command-queue with +identical properties targeting the same device and in the same OpenCL +context. + +While constructing a command-buffer it is valid for the user to interleave +calls to the same queue which create commands, such as +{clCommandNDRangeKernelKHR}, with queue submission calls, such as +{clEnqueueNDRangeKernel} or {clEnqueueCommandBufferKHR}. +That is, there is no effect on queue state from recording commands. +The purpose of the queue parameter is to define the device and properties of +the command, which are constant queries on the queue object. + +A command-buffer object should increment the reference count of attached +OpenCL objects such as queues, buffers, images, and kernels referenced in +commands recorded to the command-buffer. +This enables correct behavior of the command-buffer when its attached +objects have been released. +On destruction of the command-buffer it should decrement these reference +counts, allowing the attached objects to be freed if appropriate. + +[[command-buffer-kernel-argument-ref-counting]] [NOTE] ==== -The total number of read-only images specified as arguments to a kernel -cannot exceed {CL_DEVICE_MAX_READ_IMAGE_ARGS}. -Each image array argument to a kernel declared with the `read_only` -qualifier counts as one image. -The total number of write-only images specified as arguments to a kernel -cannot exceed {CL_DEVICE_MAX_WRITE_IMAGE_ARGS}. -Each image array argument to a kernel declared with the `write_only` -qualifier counts as one image. - -The total number of read-write images specified as arguments to a kernel -cannot exceed {CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS}. -Each image array argument to a kernel declared with the `read_write` -qualifier counts as one image. +A command-buffer object does not update the reference count of objects set +as arguments on kernels recorded into the command-buffer. +This is consistent with the reference counting behavior of {clSetKernelArg}. + +Applications should ensure that objects passed as arguments to kernels +recorded to a command-buffer are not deleted until the command-buffer has +been released. +Undefined behavior may result from the failure to follow this usage +requirement for all the command-buffers an object is used as a kernel +argument in. + +If using layered extension `<>`, +<>. ==== --- -[[event-objects]] -== Event Objects +ifdef::cl_khr_command_buffer_multi_device[] +=== Command-Buffers and Multiple Devices + +If the `<>` extension is supported, a +command-buffer can contain commands recorded to the queues of different +devices if a vendor provides support for inter-device +{cl_sync_point_khr_TYPE} synchronization. +This feature is reported either through +{CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}, which informs the user what +devices can synchronize with each other natively on the device-side, or +through {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR}, which allows +synchronization between all devices in a platform, falling back to host-side +synchronization when device-side synchronization is not available. +These two mechanisms are referred to as **device-side sync** and **universal +sync** respectively. + +If these mechanisms do not report that more than one device can be used in a +command-buffer, it will still be possible to perform multiple queue +recording in a command-buffer if the +{CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR} capability is reported for +a device. +However, with this capability all the queues commands are recorded to must +target the same device. + +Commands recorded to different command-queues in the same command-buffer may +be executed concurrently to each other unless synchronized explicitly with +sync-points. +Ordering of other commands submitted to the same command-queues as used to +enqueue a command-buffer is the responsibility of the programmer. +A command-buffer enqueue spanning multiple queues can return an event to use +for synchronization, which will complete once all commands in the +command-buffer have completed. +If ordering restrictions are required, this event (or command-queue +barriers) may be used by the user to synchronize the command-buffer enqueue +with regular commands, or another command-buffer enqueue. + +endif::cl_khr_command_buffer_multi_device[] + + + +=== Command-Buffer Lifecycle + +A command-buffer is always in one of the following states: + +[[recording]] +Recording:: Initial state of a command-buffer on creation, where commands can be +recorded to the command-buffer. + +[[executable]] +Executable:: State after command recording has finished with +{clFinalizeCommandBufferKHR} and the command-buffer may be enqueued. + +[[pending]] +Pending:: Once a command-buffer has been enqueued to a command-queue it enters +the Pending state until completion, at which point it moves back to the +<> state. + +// Image generated from the following mermaid diagram description using https://mermaid.live +// Ideally we'd use the asciidoctor-diagram extension to generate the rendered diagram, but +// there are issues installing the gem with ruby 2.3.3 +// +// [mermaid, "Lifecycle of a command-buffer", png] +// .... +// stateDiagram-v2 +// [*] --> Recording: Create +// Recording -->Executable: Finalize +// Executable --> Pending: Enqueue +// Pending --> Executable: Completion +// .... + +image::images/commandbuffer_lifecycle.png[align="center", title="Lifecycle of a command-buffer."] + +[[pending_count]] +The Pending Count is the number of copies of the command +buffer in the <> state. +By default a command-buffer's Pending Count must be 0 or 1. +If the command-buffer was created with +{CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} then the command-buffer may have a +Pending Count greater than 1. + + +=== Creating Command-Buffer Objects + +[open,refpage='clCreateCommandBufferKHR',desc='Create a command-buffer',type='protos'] +-- +To create a command-buffer that can record commands to the specified +queues, call the function + +include::{generated}/api/protos/clCreateCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clCreateCommandBufferKHR.asciidoc[] + + * _num_queues_ is the number of command-queues listed in _queues_. + If the `<>` extension is not + supported, this **must** be one. + * _queues_ is a pointer to a list of command-queues that the + command-buffer commands will be recorded to. + _queues_ must be a non-`NULL` value and the length of the list equal to + _num_queues_. + * _properties_ specifies a list of properties for the command-buffer and + their corresponding values. + Each property name is immediately followed by the corresponding desired + value. + The list is terminated with 0. + The list of supported properties is described in the table below. + If a supported property and its value is not specified in properties, + its default value will be used. + _properties_ can be `NULL` in which case the default values for + supported command-buffer properties will be used. ++ +[[commandbuffer-properties]] +.{clCreateCommandBufferKHR} properties +[cols=",,",options="header",] +|==== +| Recording Properties | Property Value | Description -// Check: Is this list of event object APIs really necessary? - -//Event objects can be used to refer to a kernel-instance command -//({clEnqueueNDRangeKernel}, {clEnqueueTask}, {clEnqueueNativeKernel}), read, -//write, map and copy commands on memory objects ( -//{clEnqueueReadBuffer}, {clEnqueueWriteBuffer}, -//{clEnqueueMapBuffer}, {clEnqueueUnmapMemObject}, -//{clEnqueueReadBufferRect}, {clEnqueueWriteBufferRect}, -//{clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueMapImage}, -//{clEnqueueCopyBuffer}, {clEnqueueCopyImage}, -//{clEnqueueCopyBufferRect}, -//{clEnqueueCopyBufferToImage}, {clEnqueueCopyImageToBuffer}), -//{clEnqueueSVMMemcpy}, {clEnqueueSVMMemFill}, -//{clEnqueueSVMMap}, {clEnqueueSVMUnmap}, {clEnqueueSVMFree}, {clEnqueueMarker}, -//{clEnqueueMarkerWithWaitList}, {clEnqueueWaitForEvents}, {clEnqueueBarrier}, -//{clEnqueueBarrierWithWaitList}, (refer to -//<>) or user events. +| {CL_COMMAND_BUFFER_FLAGS_KHR_anchor} -An event object can be used to track the execution status of a command. -The API calls that enqueue commands to a command-queue create a new event -object that is returned in the _event_ argument. -In case of an error enqueuing the command in the command-queue the event -argument does not return an event object. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_FLAGS_KHR.asciidoc[] + | {cl_command_buffer_flags_khr_TYPE} + | This is a bitfield and can be set to a combination of the following values: -The execution status of an enqueued command at any given point in time can -be one of the following: + {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR_anchor} - Allow multiple + instances of the command-buffer to be submitted to the device for + execution. + If set, devices must support + {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR}. - * {CL_QUEUED_anchor}: Indicates that the command has been enqueued in a - command-queue. - This is the initial state of all events except user events. - * {CL_SUBMITTED_anchor}: The initial state for all user events. - For all other events, indicates that the command has been submitted - by the host to the device. - * {CL_RUNNING_anchor}: Indicates that the device has started executing this - command. - In order for the execution status of an enqueued command to change from - {CL_SUBMITTED} to {CL_RUNNING}, all events that this command is waiting on - must have completed successfully i.e. their execution status must be - {CL_COMPLETE}. - * {CL_COMPLETE_anchor}: Indicates that the command has successfully completed. - * An Error Code: A negative integer value indicating that the command was - abnormally terminated. Abnormal termination may occur for a number of reasons, - such as a bad memory access. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR.asciidoc[] -[NOTE] -==== -A command is considered to be complete if its execution status is -{CL_COMPLETE} or is a negative integer value. +ifdef::cl_khr_command_buffer_multi_device[] + {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR_anchor} - All commands in the + command-buffer must use native synchronization, as reported by + {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. + This can be used as a safeguard for performant applications that do not + want to accidentally fallback to host synchronization when passing + multiple queues. + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR.asciidoc[] + +endif::cl_khr_command_buffer_multi_device[] -If the execution of a command is terminated, the command-queue associated -with this terminated command, and the associated context (and all other -command-queues in this context) may no longer be available. -The behavior of OpenCL API calls that use this context (and command-queues -associated with this context) are now considered to be -implementation-defined. -The user registered callback function specified when context is created can -be used to report appropriate error information. -==== +ifdef::cl_khr_command_buffer_mutable_dispatch[] + {CL_COMMAND_BUFFER_MUTABLE_KHR} - Enables modification of the + command-buffer, by default command-buffers are immutable. + If set, commands in the command-buffer may be updated via + {clUpdateMutableCommandsKHR}. -[open,refpage='clCreateUserEvent',desc='Creates a user event object.',type='protos'] --- -To create a user event object, call the function +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_MUTABLE_KHR.asciidoc[] +endif::cl_khr_command_buffer_mutable_dispatch[] -include::{generated}/api/protos/clCreateUserEvent.txt[] -include::{generated}/api/version-notes/clCreateUserEvent.asciidoc[] + The default value of this property is `0`. - * _context_ must be a valid OpenCL context. +ifdef::cl_khr_command_buffer_mutable_dispatch[] +| {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR.asciidoc[] + | {cl_mutable_dispatch_asserts_khr_TYPE} + | This is a bitfield and can be set to a combination of the following values: + + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR_anchor} - + An assertion by the user that the number of work-groups of any + ND-range kernel recorded in this command buffer will not be updated + beyond the number defined when the ND-range kernel was recorded. + If the user's update to the values of _local_work_size_ and/or + _global_work_size_ result in an increase in the number of + work-groups in the ND-range over the number specified when the + ND-range kernel was recorded, the behavior is undefined. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR.asciidoc[] +endif::cl_khr_command_buffer_mutable_dispatch[] +|==== * _errcode_ret_ will return an appropriate error code. If _errcode_ret_ is `NULL`, no error code is returned. -User events allow applications to enqueue commands that wait on a user event -to finish before the command is executed by the device. +ifdef::cl_khr_command_buffer_multi_device[] +.Summary of command-buffer creation configurations, for the `<>` extension +[width="100%",options="header"] +|==== +| All Devices Associated With `Queues` can Device-side Sync | Platform Supports Universal Sync | Condition | Result +.3+| Yes +.3+| Yes or No +| Any device does not support the multi-queue capability, and has more than + one queue targeting it +| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} +| User sets {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag +| OK +| Otherwise +| OK + +.3+| No +.3+| Yes +| Any device does not support the multi-queue capability, and has more than + one queue targeting it +| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} +| User sets {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag +| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} +| Otherwise +| OK - May be performance implications when synchronizing commands between + devices without device-side sync support. + +| No +| No +| Always +| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} +|==== +endif::cl_khr_command_buffer_multi_device[] + +[NOTE] +==== +Upon creation the command-buffer is defined as being in the +<> state, in order for the command-buffer to be enqueued +it must first be finalized using {clFinalizeCommandBufferKHR} after which no +further commands can be recorded. +A command-buffer is submitted for execution on command-queues with a call to +{clEnqueueCommandBufferKHR}. +==== // refError -{clCreateUserEvent} returns a valid non-zero event object and _errcode_ret_ -is set to {CL_SUCCESS} if the user event object is created successfully. +{clCreateCommandBufferKHR} returns a valid non-zero command-buffer and +_errcode_ret_ is set to {CL_SUCCESS} if the command-buffer is created +successfully. Otherwise, it returns a `NULL` value with one of the following error values returned in _errcode_ret_: - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_COMMAND_QUEUE} if any command-queue in _queues_ is not a + valid command-queue. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if any command-queue in _queues_ is + an out-of-order command-queue and the device associated with the + command-queue does not support the + {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR} capability. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the properties of any + command-queue in _queues_ does not contain the minimum properties + specified by {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR}. + * {CL_INVALID_CONTEXT} if all the command-queues in _queues_ do not have + the same OpenCL context. + * {CL_INVALID_VALUE} if _num_queues_ is zero. + * {CL_INVALID_VALUE} if _queues_ is `NULL`. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid, or + if the same property name is specified more than once. + * {CL_INVALID_PROPERTY} if values specified in _properties_ are valid but + are not supported by all the devices associated with command-queues in + _queues_. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -The initial execution status for the user event object is {CL_SUBMITTED}. +ifdef::cl_khr_command_buffer_multi_device[] +If the `<>` extension is supported: + + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if _queues_ includes more than one + command-queue associated with a device that does not support capability + {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR}. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the + {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag is set, and any device + associated with a command-queue in _queues_ cannot natively synchronize + with the other devices associated with _queues_ as reported by + {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the platform does not support the + {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR} capability, and any + device associated with a command-queue in _queues_ cannot natively + synchronize with the other devices associated with _queues_ as reported + by {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. +endif::cl_khr_command_buffer_multi_device[] -- -[open,refpage='clSetUserEventStatus',desc='Sets the execution status of a user event object.',type='protos'] +[open,refpage='clRetainCommandBufferKHR',desc='Increment a command-buffer\'s reference count',type='protos'] -- -To set the execution status of a user event object, call the function +To increment a command-buffer's reference count, call the function -include::{generated}/api/protos/clSetUserEventStatus.txt[] -include::{generated}/api/version-notes/clSetUserEventStatus.asciidoc[] +include::{generated}/api/protos/clRetainCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clRetainCommandBufferKHR.asciidoc[] - * _event_ is a user event object created using {clCreateUserEvent}. - * _execution_status_ specifies the new execution status to be set and can be - {CL_COMPLETE} or a negative integer value to indicate an error. - A negative integer value causes all enqueued commands that wait on this user - event to be terminated. - {clSetUserEventStatus} can only be called once to change the execution - status of _event_. + * _command_buffer_ specifies the command-buffer to retain. + +// refError + +{clRetainCommandBufferKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + +[open,refpage='clReleaseCommandBufferKHR',desc='Decrement a command-buffer\'s reference count',type='protos'] +-- +To decrement a command-buffer's reference count, call the function + +include::{generated}/api/protos/clReleaseCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clReleaseCommandBufferKHR.asciidoc[] + + * _command_buffer_ specifies the command-buffer to release. [NOTE] ==== -If there are enqueued commands with user events in the _event_wait_list_ -argument of *+clEnqueue*+* commands, the user must ensure that the status of -these user events being waited on are set using {clSetUserEventStatus} -before any OpenCL APIs that release OpenCL objects except for event objects -are called; otherwise the behavior is undefined. +After the _command_buffer_ reference count becomes zero and has finished +execution, the command-buffer is deleted. +==== -For example, the following code sequence will result in undefined behavior -of {clReleaseMemObject}. +// refError -[source,opencl] ----- -ev1 = clCreateUserEvent(ctx, NULL); -clEnqueueWriteBuffer(cq, buf1, CL_FALSE, ..., 1, &ev1, NULL); -clEnqueueWriteBuffer(cq, buf2, CL_FALSE, ...); -clReleaseMemObject(buf2); -clSetUserEventStatus(ev1, CL_COMPLETE); ----- +{clReleaseCommandBufferKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: -The following code sequence, however, works correctly. + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- -[source,opencl] ----- -ev1 = clCreateUserEvent(ctx, NULL); -clEnqueueWriteBuffer(cq, buf1, CL_FALSE, ..., 1, &ev1, NULL); -clEnqueueWriteBuffer(cq, buf2, CL_FALSE, ...); -clSetUserEventStatus(ev1, CL_COMPLETE); -clReleaseMemObject(buf2); ----- + +=== Enqueuing a Command-Buffer + +[open,refpage='clFinalizeCommandBufferKHR',desc='Finalize command recording for a command-buffer',type='protos'] +-- +To finalize command recording ready for enqueuinga command-buffer on a +command-queue, call the function + +include::{generated}/api/protos/clFinalizeCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clFinalizeCommandBufferKHR.asciidoc[] + + * _command_buffer_ refers to a valid command-buffer object. + +[NOTE] +==== +{clFinalizeCommandBufferKHR} places the command-buffer in the +<> state where commands can no longer be recorded, at +this point the command-buffer is ready to be enqueued. ==== // refError -{clSetUserEventStatus} returns {CL_SUCCESS} if the function was executed +{clFinalizeCommandBufferKHR} returns {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_EVENT} if _event_ is not a valid user event object. - * {CL_INVALID_VALUE} if the _execution_status_ is not {CL_COMPLETE} or a - negative integer value. - * {CL_INVALID_OPERATION} if the _execution_status_ for _event_ has already - been changed by a previous call to {clSetUserEventStatus}. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ is not in the <> state. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clWaitForEvents',desc='Waits on the host thread for commands identified by event objects to complete.',type='protos'] +[open,refpage='clEnqueueCommandBufferKHR',desc='Enqueue a command-buffer to execute on command-queues',type='protos'] -- -To wait for events to complete, call the function +To enqueue a command-buffer to execute on command-queues, call the function -include::{generated}/api/protos/clWaitForEvents.txt[] -include::{generated}/api/version-notes/clWaitForEvents.asciidoc[] +include::{generated}/api/protos/clEnqueueCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clEnqueueCommandBufferKHR.asciidoc[] - * _num_events_ is the number of events in _event_list_. - * _event_list_ is a pointer to a list of event object handles. + * _num_queues_ is the number of command-queues listed in _queues_. + * _queues_ is a pointer to an ordered list of command-queues <> with the command-queues used on recording. + _queues_ can be `NULL`, in which case the default command-queues used on + command-buffer creation are used and _num_queues_ must be 0. + * _command_buffer_ refers to a valid command-buffer object. + * _event_wait_list_, _num_events_in_wait_list_ specify events that need to + complete before this particular command can be executed. + If _event_wait_list_ is `NULL`, then this particular command does not + wait on any event to complete. + If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. + If event_wait_list is not `NULL`, the list of events pointed to by + _event_wait_list_ must be valid and _num_events_in_wait_list_ must be + greater than 0. + The events specified in _event_wait_list_ act as synchronization points. + The context associated with events in _event_wait_list_ and + command_queue must be the same. + The memory associated with _event_wait_list_ can be reused or freed + after the function returns. + * _event_ will return an event object that identifies this command and can be + used to query for profiling information or queue a wait for this + particular command to complete. + _event_ can be `NULL` in which case it will not be possible for the + application to wait on this command or query it for profiling + information. -This function waits on the host thread for commands identified by event -objects in _event_list_ to complete. -A command is considered complete if its execution status is {CL_COMPLETE} or a -negative value. -The events specified in _event_list_ act as synchronization points. +[NOTE] +==== +To enqueue a command-buffer it must be in a <> state, +see {clFinalizeCommandBufferKHR}. +==== // refError -{clWaitForEvents} returns {CL_SUCCESS} if the execution status of all events -in _event_list_ is {CL_COMPLETE}. +{clEnqueueCommandBufferKHR} returns {CL_SUCCESS} if the command-buffer +execution was successfully queued, or one of the errors below: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has not been finalized. + * {CL_INVALID_OPERATION} if _command_buffer_ was not created with the + {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag and is in the <> state. + * {CL_INVALID_VALUE} if _queues_ is `NULL` and _num_queues_ is > 0, or + _queues_ is not `NULL` and _num_queues_ is 0. + * {CL_INVALID_VALUE} if _num_queues_ is > 0 and not the same value as + _num_queues_ set on _command_buffer_ creation. + * {CL_INVALID_COMMAND_QUEUE} if any element of _queues_ is not a valid + command-queue. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if any element of _queues_ is not + <> with the command-queue set on + _command_buffer_ creation at the same list index. + * {CL_INVALID_CONTEXT} if any element of _queues_ does not have the same + context as the command-queue set on _command_buffer_ creation at the + same list index. + * {CL_INVALID_CONTEXT} if context associated with _command_buffer_ and + events in _event_wait_list_ are not the same. + * {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution + instance of _command_buffer_ on the command-queues because of + insufficient resources needed to execute _command_buffer_. + * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and + _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and + _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ + are not valid events. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. +-- + + +=== Recording Commands to a Command-Buffer + +[open,refpage='clCommandBarrierWithWaitListKHR',desc='Record a barrier operation to a command-queue',type='protos'] +-- +To record a barrier operation used as a synchronization point, call the +function + +include::{generated}/api/protos/clCommandBarrierWithWaitListKHR.txt[] +include::{generated}/api/version-notes/clCommandBarrierWithWaitListKHR.asciidoc[] + + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded + to. + This parameter is unused, as only a single + command-queue is supported, and **must** be `NULL`. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ +must be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. ++ +If _sync_point_wait_list_ is `NULL`, then this particular command +waits until all previous recorded commands to _command_queue_ have +completed. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this barrier command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. + +[NOTE] +==== +{clCommandBarrierWithWaitListKHR} waits for either a list of +synchronization-points to complete, or if the list is empty it waits for all +commands previously recorded in _command_buffer_ to complete before it +completes. +This command blocks command execution, that is, any following commands +recorded after it do not execute until it completes. +==== + +// refError + +{clCommandBarrierWithWaitListKHR} returns {CL_SUCCESS} if the function is +executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_VALUE} if _num_events_ is zero or _event_list_ is `NULL`. - * {CL_INVALID_CONTEXT} if events specified in _event_list_ do not belong to - the same context. - * {CL_INVALID_EVENT} if event objects specified in _event_list_ are not - valid event objects. - * {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of - any of the events in _event_list_ is a negative integer value. - This error code is <> version 1.1. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and + _command_buffer_ is not the same. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- -[open,refpage='clGetEventInfo',desc='Returns information about the event object.',type='protos'] +[open,refpage='clCommandCopyBufferKHR',desc='Record a command to copy between two buffer objects',type='protos'] -- -To return information about an event object, call the function +To record a command to copy from one buffer object to another, call the +function -include::{generated}/api/protos/clGetEventInfo.txt[] -include::{generated}/api/version-notes/clGetEventInfo.asciidoc[] +include::{generated}/api/protos/clCommandCopyBufferKHR.txt[] +include::{generated}/api/version-notes/clCommandCopyBufferKHR.asciidoc[] - * _event_ specifies the event object being queried. - * _param_name_ specifies the information to query. - The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetEventInfo} is described in the - <> table. - * _param_value_ is a pointer to memory where the appropriate result being - queried is returned. - If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. - * _param_value_size_ret_ returns the actual size in bytes of data being - queried by _param_name_. - If _param_value_size_ret_ is `NULL`, it is ignored. + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _src_buffer_, _dst_buffer_, _src_offset_, _dst_offset_, _size_ refer + to {clEnqueueCopyBuffer}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ +must be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. -[[event-info-table]] -.List of supported param_names by {clGetEventInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Event Info | Return Type | Description -| {CL_EVENT_COMMAND_QUEUE_anchor} +// refError -include::{generated}/api/version-notes/CL_EVENT_COMMAND_QUEUE.asciidoc[] - | {cl_command_queue_TYPE} - | Return the command-queue associated with _event_. - For user event objects, a `NULL` value is returned. -| {CL_EVENT_CONTEXT_anchor} +{clCommandCopyBufferKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns the errors defined by {clEnqueueCopyBuffer} except: -include::{generated}/api/version-notes/CL_EVENT_CONTEXT.asciidoc[] - | {cl_context_TYPE} - | Return the context associated with _event_. -| {CL_EVENT_COMMAND_TYPE_anchor} +{CL_INVALID_COMMAND_QUEUE} is replaced with: -include::{generated}/api/version-notes/CL_EVENT_COMMAND_TYPE.asciidoc[] - | {cl_command_type_TYPE} - | Return the command type associated with _event_ as described in the - <> table. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -| {CL_EVENT_COMMAND_EXECUTION_STATUS_anchor} footnote:[{fn-event-status-order}] +{CL_INVALID_CONTEXT} is replaced with: -include::{generated}/api/version-notes/CL_EVENT_COMMAND_EXECUTION_STATUS.asciidoc[] - | {cl_int_TYPE} - | Return the execution status of the command identified by event. - Valid values are: + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. - {CL_QUEUED} (command has been enqueued in the command-queue), +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - {CL_SUBMITTED} (enqueued command has been submitted by the host to the - device associated with the command-queue), + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. - {CL_RUNNING} (device is currently executing this command), +New errors: - {CL_COMPLETE} (the command has completed), or + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- - Error code given by a negative integer value. (command was - abnormally terminated - this may be caused by a bad memory access - etc.). - These error codes come from the same set of error codes that are - returned from the platform or runtime API calls as return values or - errcode_ret values. -| {CL_EVENT_REFERENCE_COUNT_anchor} footnote:[{fn-reference-count-usage}] +[open,refpage='clCommandCopyBufferRectKHR',desc='Record a command to copy a rectangular region from one buffer object to another',type='protos'] +-- +To record a command to copy a rectangular region from a buffer object to +another buffer object, call the function -include::{generated}/api/version-notes/CL_EVENT_REFERENCE_COUNT.asciidoc[] - | {cl_uint_TYPE} - | Return the _event_ reference count. -|==== +include::{generated}/api/protos/clCommandCopyBufferRectKHR.txt[] +include::{generated}/api/version-notes/clCommandCopyBufferRectKHR.asciidoc[] -[[event-command-type-table]] -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _src_origin_, _dst_origin_, _region_, _src_row_pitch_, + _src_slice_pitch_, _dst_row_pitch_, _dst_slice_pitch_ refer to + {clEnqueueCopyBufferRect}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must +be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. + +[NOTE] +==== +{clCommandCopyBufferRectKHR} records a command to copy a 2D or 3D rectangular +region from the buffer object identified by _src_buffer_ to a 2D or 3D region +in the buffer object identified by _dst_buffer_. +Copying begins at the source offset and destination offset which are +computed as described in the description for _src_origin_ and _dst_origin_. + +Each byte of the region's width is copied from the source offset to the +destination offset. +After copying each width, the source and destination offsets are incremented +by their respective source and destination row pitches. +After copying each 2D rectangle, the source and destination offsets are +incremented by their respective source and destination slice pitches. +==== + +// refError + +{clCommandCopyBufferRectKHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise, it returns the errors defined by {clEnqueueCopyBufferRect} +except: + +{CL_INVALID_COMMAND_QUEUE} is replaced with: + + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. + +{CL_INVALID_CONTEXT} is replaced with: + + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. -| {clEnqueueNDRangeKernel} -| {CL_COMMAND_NDRANGE_KERNEL_anchor} +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_NDRANGE_KERNEL.asciidoc[] + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -| {clEnqueueTask} -| {CL_COMMAND_TASK_anchor} +New errors: -include::{generated}/api/version-notes/CL_COMMAND_TASK.asciidoc[] + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -| {clEnqueueNativeKernel} -| {CL_COMMAND_NATIVE_KERNEL_anchor} +[open,refpage='clCommandCopyBufferToImageKHR',desc='Record a command to copy a buffer object to an image object',type='protos'] +-- +To record a command to copy a buffer object to an image object, call the +function -include::{generated}/api/version-notes/CL_COMMAND_NATIVE_KERNEL.asciidoc[] +include::{generated}/api/protos/clCommandCopyBufferToImageKHR.txt[] +include::{generated}/api/version-notes/clCommandCopyBufferToImageKHR.asciidoc[] -| {clEnqueueReadBuffer} -| {CL_COMMAND_READ_BUFFER_anchor} + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _src_buffer_, _dst_image_, _src_offset_, _dst_origin_, _region_ refer to + {clEnqueueCopyBufferToImage} + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must +be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. -include::{generated}/api/version-notes/CL_COMMAND_READ_BUFFER.asciidoc[] +// refError -| {clEnqueueWriteBuffer} -| {CL_COMMAND_WRITE_BUFFER_anchor} +{clCommandCopyBufferToImageKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns the errors defined by {clEnqueueCopyBufferToImage} +except: -include::{generated}/api/version-notes/CL_COMMAND_WRITE_BUFFER.asciidoc[] +{CL_INVALID_COMMAND_QUEUE} is replaced with: -| {clEnqueueCopyBuffer} -| {CL_COMMAND_COPY_BUFFER_anchor} + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER.asciidoc[] +{CL_INVALID_CONTEXT} is replaced with: -| {clEnqueueReadImage} -| {CL_COMMAND_READ_IMAGE_anchor} + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, _src_buffer_, and _dst_image_ are not the same. -include::{generated}/api/version-notes/CL_COMMAND_READ_IMAGE.asciidoc[] +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -| {clEnqueueWriteImage} -| {CL_COMMAND_WRITE_IMAGE_anchor} + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -include::{generated}/api/version-notes/CL_COMMAND_WRITE_IMAGE.asciidoc[] +New errors: -| {clEnqueueCopyImage} -| {CL_COMMAND_COPY_IMAGE_anchor} + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -include::{generated}/api/version-notes/CL_COMMAND_COPY_IMAGE.asciidoc[] +[open,refpage='clCommandCopyImageKHR',desc='Record a command to copy between two image objects',type='protos'] +-- +To record a command to copy between two image objects, call the function -| {clEnqueueCopyBufferToImage} -| {CL_COMMAND_COPY_BUFFER_TO_IMAGE_anchor} +include::{generated}/api/protos/clCommandCopyImageKHR.txt[] +include::{generated}/api/version-notes/clCommandCopyImageKHR.asciidoc[] -include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER_TO_IMAGE.asciidoc[] + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _src_image_, _dst_image_, _src_origin_, _dst_origin_, _region_ refer to + {clEnqueueCopyImage}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must +be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. -| {clEnqueueCopyImageToBuffer} -| {CL_COMMAND_COPY_IMAGE_TO_BUFFER_anchor} +[NOTE] +==== +It is currently a requirement that the _src_image_ and _dst_image_ image +memory objects for {clCommandCopyImageKHR} must have the exact same image +format, i.e. the {cl_image_format_TYPE} descriptor specified when +_src_image_ and _dst_image_ are created must match. +==== -include::{generated}/api/version-notes/CL_COMMAND_COPY_IMAGE_TO_BUFFER.asciidoc[] +// refError -| {clEnqueueMapBuffer} -| {CL_COMMAND_MAP_BUFFER_anchor} +{clCommandCopyImageKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns the errors defined by {clEnqueueCopyImage} except: -include::{generated}/api/version-notes/CL_COMMAND_MAP_BUFFER.asciidoc[] +{CL_INVALID_COMMAND_QUEUE} is replaced with: -| {clEnqueueMapImage} -| {CL_COMMAND_MAP_IMAGE_anchor} + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -include::{generated}/api/version-notes/CL_COMMAND_MAP_IMAGE.asciidoc[] +{CL_INVALID_CONTEXT} is replaced with: -| {clEnqueueUnmapMemObject} -| {CL_COMMAND_UNMAP_MEM_OBJECT_anchor} + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, _src_image_, and _dst_image_ are not the same. -include::{generated}/api/version-notes/CL_COMMAND_UNMAP_MEM_OBJECT.asciidoc[] +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -| {clEnqueueMarker}, + - {clEnqueueMarkerWithWaitList} -| {CL_COMMAND_MARKER_anchor} + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -include::{generated}/api/version-notes/CL_COMMAND_MARKER.asciidoc[] +New errors: -| {clEnqueueReadBufferRect} -| {CL_COMMAND_READ_BUFFER_RECT_anchor} + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -include::{generated}/api/version-notes/CL_COMMAND_READ_BUFFER_RECT.asciidoc[] +[open,refpage='clCommandCopyImageToBufferKHR',desc='Record a command to copy an image object to a buffer object',type='protos'] +-- +To record a command to copy an image object to a buffer object, call the +function -| {clEnqueueWriteBufferRect} -| {CL_COMMAND_WRITE_BUFFER_RECT_anchor} +include::{generated}/api/protos/clCommandCopyImageToBufferKHR.txt[] +include::{generated}/api/version-notes/clCommandCopyImageToBufferKHR.asciidoc[] -include::{generated}/api/version-notes/CL_COMMAND_WRITE_BUFFER_RECT.asciidoc[] + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _src_image_, _dst_buffer_, _src_origin_, _region_, _dst_offset_ refer to + {clEnqueueCopyImageToBuffer}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ +must be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. -| {clEnqueueCopyBufferRect} -| {CL_COMMAND_COPY_BUFFER_RECT_anchor} +// refError -include::{generated}/api/version-notes/CL_COMMAND_COPY_BUFFER_RECT.asciidoc[] +{clCommandCopyImageToBufferKHR} returns {CL_SUCCESS} if the function is +executed successfully. +Otherwise, it returns the errors defined by {clEnqueueCopyImageToBuffer} +except: -| {clCreateUserEvent} -| {CL_COMMAND_USER_anchor} +{CL_INVALID_COMMAND_QUEUE} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_USER.asciidoc[] + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -| {clEnqueueBarrier}, + - {clEnqueueBarrierWithWaitList} -| {CL_COMMAND_BARRIER_anchor} +{CL_INVALID_CONTEXT} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_BARRIER.asciidoc[] + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, _src_image_, and _dst_buffer_ are not the same. -| {clEnqueueMigrateMemObjects} -| {CL_COMMAND_MIGRATE_MEM_OBJECTS_anchor} +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_MIGRATE_MEM_OBJECTS.asciidoc[] + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -| {clEnqueueFillBuffer} -| {CL_COMMAND_FILL_BUFFER_anchor} +New errors: -include::{generated}/api/version-notes/CL_COMMAND_FILL_BUFFER.asciidoc[] + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -| {clEnqueueFillImage} -| {CL_COMMAND_FILL_IMAGE_anchor} +[open,refpage='clCommandFillBufferKHR',desc='Record a command to fill a buffer object with a pattern',type='protos'] +-- +To record a command to fill a buffer object with a pattern of a given +pattern size, call the function -include::{generated}/api/version-notes/CL_COMMAND_FILL_IMAGE.asciidoc[] +include::{generated}/api/protos/clCommandFillBufferKHR.txt[] +include::{generated}/api/version-notes/clCommandFillBufferKHR.asciidoc[] -| {clEnqueueSVMFree} -| {CL_COMMAND_SVM_FREE_anchor} +[NOTE] +==== +The usage information which indicates whether the memory object can be read or +written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument +value specified when _buffer_ is created is ignored by +{clCommandFillBufferKHR}. +==== -include::{generated}/api/version-notes/CL_COMMAND_SVM_FREE.asciidoc[] + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _buffer_, _pattern_, _pattern_size_, _offset_, _size_ refer to + {clEnqueueFillBuffer}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ +must be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. -| {clEnqueueSVMMemcpy} -| {CL_COMMAND_SVM_MEMCPY_anchor} +// refError -include::{generated}/api/version-notes/CL_COMMAND_SVM_MEMCPY.asciidoc[] +{clCommandFillBufferKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns the errors defined by {clEnqueueFillBuffer} except: -| {clEnqueueSVMMemFill} -| {CL_COMMAND_SVM_MEMFILL_anchor} +{CL_INVALID_COMMAND_QUEUE} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_SVM_MEMFILL.asciidoc[] + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -| {clEnqueueSVMMap} -| {CL_COMMAND_SVM_MAP_anchor} +{CL_INVALID_CONTEXT} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_SVM_MAP.asciidoc[] + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, and _buffer_ are not the same. -| {clEnqueueSVMUnmap} -| {CL_COMMAND_SVM_UNMAP_anchor} +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -include::{generated}/api/version-notes/CL_COMMAND_SVM_UNMAP.asciidoc[] + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -| {clEnqueueSVMMigrateMem} -| {CL_COMMAND_SVM_MIGRATE_MEM_anchor} +New errors: -include::{generated}/api/version-notes/CL_COMMAND_SVM_MIGRATE_MEM.asciidoc[] + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -Prior to OpenCL 3.0, implementations should return -{CL_COMMAND_MIGRATE_MEM_OBJECTS}, but may return an implementation-defined -event command type for {clEnqueueSVMMigrateMem}. +[open,refpage='clCommandFillImageKHR',desc='Record a command to fill an image object with a specified color',type='protos'] +-- +To record a command to fill an image object with a specified color, call the +function -|==== +include::{generated}/api/protos/clCommandFillImageKHR.txt[] +include::{generated}/api/version-notes/clCommandFillImageKHR.asciidoc[] -Using {clGetEventInfo} to determine if a command identified by _event_ has -finished execution (i.e. {CL_EVENT_COMMAND_EXECUTION_STATUS} returns -{CL_COMPLETE}) is not a synchronization point. -There are no guarantees that the memory objects being modified by command -associated with _event_ will be visible to other enqueued commands. +[NOTE] +==== +The usage information which indicates whether the memory object can be read or +written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument +value specified when image is created is ignored by {clCommandFillImageKHR}. +==== + + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _image_, _fill_color_, _origin_, _region_ refer to {clEnqueueFillImage}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ +must be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. // refError -{clGetEventInfo} returns {CL_SUCCESS} if the function is executed +{clCommandFillImageKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: +Otherwise, it returns the errors defined by {clEnqueueFillImage} except: - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_VALUE} if information to query given in _param_name_ cannot be - queried for _event_. - * {CL_INVALID_EVENT} if _event_ is a not a valid event object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +{CL_INVALID_COMMAND_QUEUE} is replaced with: -[open,refpage='clSetEventCallback',desc='Registers a user callback function for a specific command execution status.',type='protos'] --- -To register a user callback function for a specific command execution -status, call the function + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -include::{generated}/api/protos/clSetEventCallback.txt[] -include::{generated}/api/version-notes/clSetEventCallback.asciidoc[] +{CL_INVALID_CONTEXT} is replaced with: - * _event_ is a valid event object. - * _command_exec_callback_type_ specifies the command execution status for - which the callback is registered. - The command execution status types for which a callback can be registered - are {CL_SUBMITTED}, {CL_RUNNING}, or {CL_COMPLETE}. - The callback function registered for a _command_exec_callback_type_ value of - {CL_COMPLETE} will be called when the command has completed successfully or - is abnormally terminated. - * _pfn_event_notify_ is the event callback function that can be registered by - the application. - This callback function may be called asynchronously by the OpenCL - implementation. - It is the application's responsibility to ensure that the callback function - is thread-safe. - The parameters to this callback function are: - ** _event_ is the event object for which the callback function is invoked. - ** _event_command_status_ is equal to the _command_exec_callback_type_ - used while registering the callback. - Refer to the <> - table for the command execution status values. - If the callback is called as the result of the command associated with - event being abnormally terminated, an appropriate error code for the - error that caused the termination will be passed to - _event_command_status_ instead. - ** _user_data_ is a pointer to user supplied data. - * _user_data_ will be passed as the _user_data_ argument when _pfn_notify_ is - called. - _user_data_ can be `NULL`. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, and _image_ are not the same. -Each call to {clSetEventCallback} registers the specified user callback -function on a callback stack associated with _event_. -The order in which the registered user callback functions are called is -undefined. +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -The registered callback function will be called when the execution status of the -command associated with _event_ changes to an execution status equal to or past -the status specified by _command_exec_status_, or for the execution status -{CL_COMPLETE}, if the command is abnormally terminated. -There is no guarantee that the callback functions registered for various command -execution status values for an event will be called in the exact order that the -execution status of a command changes. -Furthermore, it should be noted that calling a callback for an event execution -status other than {CL_COMPLETE} in no way implies that the memory model or -execution model as defined by the OpenCL specification has changed. For example, -it is not valid to assume that a corresponding memory transfer has completed -unless the event is in the state {CL_COMPLETE}. + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -All callbacks registered for an event object must be called before the event -object is destroyed. +New errors: -Callbacks should return promptly. -Behavior is undefined when calling expensive system routines, OpenCL APIs to -create contexts or command-queues, or blocking OpenCL APIs in an event callback. -Rather than calling a blocking OpenCL API in an event callback, applications -may call a non-blocking OpenCL API, then register a completion callback -for the non-blocking OpenCL API with the remainder of the work. + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -Because commands in a command-queue are not required to begin execution -until the command-queue is flushed, callbacks that enqueue commands on a -command-queue should either call {clFlush} on the queue before returning, -or arrange for the command-queue to be flushed later. +[open,refpage='clCommandNDRangeKernelKHR',desc='Record a command to execute a kernel on a device',type='protos'] +-- +To record a command to execute a kernel on a device, call the function + +include::{generated}/api/protos/clCommandNDRangeKernelKHR.txt[] +include::{generated}/api/version-notes/clCommandNDRangeKernelKHR.asciidoc[] + + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _properties_ specifies a list of properties for the kernel command and + their corresponding values. + Each property name is immediately followed by the corresponding desired + value. + The list is terminated with 0. +ifdef::cl_khr_command_buffer_mutable_dispatch[] + If a supported property and its value is not specified in _properties_, its + default value will be used. + _properties_ may be `NULL`, in which case the default values for supported + properties will be used. + The `<>` extension does not define any + properties, but supported properties defined by extensions are defined + in the <> table. +endif::cl_khr_command_buffer_mutable_dispatch[] + * _kernel_ is a valid kernel object which **must** have its arguments set. + Any changes to _kernel_ after calling {clCommandNDRangeKernelKHR}, such + as with {clSetKernelArg} or {clSetKernelExecInfo}, have no effect on the + recorded command. + If _kernel_ is recorded to a following {clCommandNDRangeKernelKHR} + command however, then that command will capture the updated state of + _kernel_. + * _work_dim_, _global_work_offset_, _global_work_size_, _local_work_size_ + Refer to {clEnqueueNDRangeKernel}. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must +be 0. +If _sync_point_wait_list_ is not `NULL`, the list of +synchronization-points pointed to by _sync_point_wait_list_ must be +valid and _num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. +ifdef::cl_khr_command_buffer_mutable_dispatch[] + If the `<>` extension is + supported, and _mutable_handle_ is not `NULL`, it can be used in the + {cl_mutable_dispatch_config_khr_TYPE} struct to update the command + configuration between recordings. + The lifetime of this handle is tied to the parent command-buffer, such + that freeing the command-buffer will also free this handle. +endif::cl_khr_command_buffer_mutable_dispatch[] +ifndef::cl_khr_command_buffer_mutable_dispatch[] + If the `<>` extension is not + supported, this parameter is unused, and **must** be `NULL`. +endif::cl_khr_command_buffer_mutable_dispatch[] + +[[ndrange-kernel-properties-table]] +.List of supported properties by {clCommandNDRangeKernelKHR} +[cols=",,",options="header",] +|==== +| Recording Properties | Property Value | Description + +ifdef::cl_khr_command_buffer_mutable_dispatch[] +| {CL_MUTABLE_DISPATCH_ASSERTS_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ASSERTS_KHR.asciidoc[] + | {cl_mutable_dispatch_asserts_khr_TYPE} + | This is a bitfield and can be set to a combination of the following + values: + + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR_anchor} + + An assertion by the user that the number of work-groups of this + ND-range kernel will not be updated beyond the number defined when the + ND-range kernel was recorded. + The number of work-groups is defined as the product for each _i_ from + _0_ to _work_dim - 1_ of + _ceil(global_work_size[i]/local_work_size[i])_. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR.asciidoc[] + +| {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR.asciidoc[] + | {cl_mutable_dispatch_fields_khr_TYPE} + | This is a bitfield and can be set to a combination of the following + values: + + {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR_anchor} determines whether the + _global_work_offset_ of kernel execution can be modified after + recording. + If set, the _global_work_offset_ of the kernel execution can be + changed with {clUpdateMutableCommandsKHR} using the + {cl_mutable_dispatch_config_khr_TYPE} field of the _mutable_config_ + parameter. + Otherwise, the _global_work_offset_ cannot be modified. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR_anchor} determines whether the + _global_work_size_ of kernel execution can be modified after + recording. + If set, the _global_work_size_ of the kernel execution can be changed + with {clUpdateMutableCommandsKHR} using the + {cl_mutable_dispatch_config_khr_TYPE} field of the _mutable_config_ + parameter. + Otherwise, the _global_work_size_ cannot be modified. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR_anchor} determines whether the + _local_work_size_ of kernel execution can be modified after recording. + If set, the _local_work_size_ of the kernel execution can be changed + with {clUpdateMutableCommandsKHR} using the + {cl_mutable_dispatch_config_khr_TYPE} field of the _mutable_config_ + parameter. + Otherwise, the _local_work_size_ cannot be modified. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR_anchor} determines whether the + kernel arguments set on _kernel_ can be updated between executions. + If set, the kernel arguments normally set with {clSetKernelArg} and + {clSetKernelArgSVMPointer} can be changed with + {clUpdateMutableCommandsKHR} using the + {cl_mutable_dispatch_config_khr_TYPE} field of the _mutable_config_ + parameter. + Otherwise, the kernel arguments cannot be modified between executions. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_ARGUMENTS_KHR.asciidoc[] + + {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR_anchor} determines whether the + information passed to _kernel_ can be updated between executions. + If set, the execution information of the kernel can be changed with + {clUpdateMutableCommandsKHR} using the + {cl_mutable_dispatch_config_khr_TYPE} field of the _mutable_config_ + parameter. + Otherwise, the kernel execution information cannot be modified. + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_EXEC_INFO_KHR.asciidoc[] + + If {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR_anchor} is not specified + then it defaults to the value returned by the + {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} device query. +endif::cl_khr_command_buffer_mutable_dispatch[] +|==== + +[NOTE] +==== +The work-group size to be used for _kernel_ can also be specified in the +program source using the +`+__attribute__((reqd_work_group_size(X, Y, Z)))+` qualifier. +In this case the size of work-group specified by _local_work_size_ must +match the value specified by the `reqd_work_group_size` `+__attribute__+` +qualifier. + +These work-group instances are executed in parallel across multiple compute +units or concurrently on the same compute unit. + +Each work-item is uniquely identified by a global identifier. +The global ID, which can be read inside the kernel, is computed using the +value given by _global_work_size_ and _global_work_offset_. +In addition, a work-item is also identified within a work-group by a unique +local ID. +The local ID, which can also be read by the kernel, is computed using the +value given by _local_work_size_. +The starting local ID is always (0, 0, ... 0). +==== // refError -{clSetEventCallback} returns {CL_SUCCESS} if the function is executed +{clCommandNDRangeKernelKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_EVENT} if _event_ is not a valid event object. - * {CL_INVALID_VALUE} if _pfn_event_notify_ is `NULL` or if - _command_exec_callback_type_ is not {CL_SUBMITTED}, {CL_RUNNING}, or - {CL_COMPLETE}. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +Otherwise, it returns the errors defined by {clEnqueueNDRangeKernel} except: +{CL_INVALID_COMMAND_QUEUE} is replaced with: -[open,refpage='clRetainEvent',desc='Increments the event reference count.',type='protos'] --- -To retain an event object, call the function + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -include::{generated}/api/protos/clRetainEvent.txt[] -include::{generated}/api/version-notes/clRetainEvent.asciidoc[] +{CL_INVALID_CONTEXT} is replaced with: - * _event_ is the event object to be retained. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, and _kernel_ are not the same. -The _event_ reference count is incremented. -The OpenCL commands that return an event perform an implicit retain. +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: -// refError + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -{clRetainEvent} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: +New errors: - * {CL_INVALID_EVENT} if _event_ is not a valid event object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_VALUE} if values specified in _properties_ are not valid + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if the `<>` + extension is not supported and _mutable_handle_ is not `NULL`. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ + does not support {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR} and + _kernel_ contains a printf call. + * {CL_INVALID_OPERATION} if the device associated with _command_queue_ + does not support {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR} + and _kernel_ contains a kernel-enqueue call. -[open,refpage='clReleaseEvent',desc='Decrements the event reference count.',type='protos'] --- -To release an event object, call the function +ifdef::cl_khr_command_buffer_mutable_dispatch[] +If the `<>` extension is supported: -include::{generated}/api/protos/clReleaseEvent.txt[] -include::{generated}/api/version-notes/clReleaseEvent.asciidoc[] + * {CL_INVALID_OPERATION} if the requested + {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} properties are not reported + by {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} for the device + associated with _command_queue_. + If _command_queue_ is `NULL`, the device associated with + _command_buffer_ must report support for these properties. + * {CL_INVALID_VALUE} if _command_buffer_ was created with the + {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} property with + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} and + _local_work_size_ is `NULL`, or if _properties_ includes the + {CL_MUTABLE_DISPATCH_ASSERTS_KHR} property with + {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} and + _local_work_size_ is `NULL`. +endif::cl_khr_command_buffer_mutable_dispatch[] - * _event_ is the event object to be released. +-- -The _event_ reference count is decremented. +[open,refpage='clCommandSVMMemcpyKHR',desc='Record a command to do an SVM memcpy operation',type='protos'] +-- +To record a command to do an SVM memcpy operation, call the function -The event object is deleted once the reference count becomes zero, the -specific command identified by this event has completed (or terminated) and -there are no commands in the command-queues of a context that require a wait -for this event to complete. -Using this function to release a reference that was not obtained by creating -the object or by calling {clRetainEvent} causes undefined behavior. +include::{generated}/api/protos/clCommandSVMMemcpyKHR.txt[] +include::{generated}/api/version-notes/clCommandSVMMemcpyKHR.asciidoc[] -[NOTE] -==== -Developers should be careful when releasing their last reference count on -events created by {clCreateUserEvent} that have not yet been set to status -of {CL_COMPLETE} or an error. -If the user event was used in the event_wait_list argument passed to a -*+clEnqueue*+* API or another application host thread is waiting for it in -{clWaitForEvents}, those commands and host threads will continue to wait for -the event status to reach {CL_COMPLETE} or error, even after the application -has released the object. -Since in this scenario the application has released its last reference count -to the user event, it would be in principle no longer valid for the -application to change the status of the event to unblock all the other -machinery. -As a result the waiting tasks will wait forever, and associated events, -{cl_mem_TYPE} objects, command-queues and contexts are likely to leak. -In-order command-queues caught up in this deadlock may cease to do any work. -==== + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _dst_ptr_ is the pointer to a host (if the device supports system SVM) + or SVM memory allocation where data is copied to. + * _src_ptr_ is the pointer to a host (if the device supports system SVM) + or SVM memory allocation where data is copied from. + * _size_ is the size in bytes of data being copied. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this particular + command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must be +0. +If _sync_point_wait_list_ is not `NULL`, the list of synchronization-points +pointed to by _sync_point_wait_list_ must be valid and +_num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. // refError -{clReleaseEvent} returns {CL_SUCCESS} if the function is executed +{clCommandSVMMemcpyKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: +Otherwise, it returns the errors defined by {clEnqueueSVMMemcpy} except: - * {CL_INVALID_EVENT} if _event_ is not a valid event object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +{CL_INVALID_COMMAND_QUEUE} is replaced with: + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -[[markers-barriers-waiting-for-events]] -== Markers, Barriers and Waiting for Events +{CL_INVALID_CONTEXT} is replaced with: -[open,refpage='clEnqueueMarkerWithWaitList',desc='Enqueues a marker command which waits for either a list of events to complete, or all previously enqueued commands to complete.',type='protos'] --- -To enqueue a marker command which waits for events or commands to complete, -call the function + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, and _kernel_ are not the same. -include::{generated}/api/protos/clEnqueueMarkerWithWaitList.txt[] -include::{generated}/api/version-notes/clEnqueueMarkerWithWaitList.asciidoc[] +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - * _command_queue_ is a valid host command-queue. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. -The context associated with events in _event_wait_list_ and _command_queue_ -must be the same. -The memory associated with _event_wait_list_ can be reused or freed after -the function returns. +New errors: -If _event_wait_list_ is `NULL`, then this particular command waits until all -previous enqueued commands to _command_queue_ have completed. + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- -The marker command either waits for a list of events to complete, or if the -list is empty it waits for all commands previously enqueued in -_command_queue_ to complete before it completes. -This command returns an _event_ which can be waited on, i.e. this event can -be waited on to insure that all events either in the _event_wait_list_ or -all previously enqueued commands, queued before this command to -_command_queue_, have completed. +[open,refpage='clCommandSVMMemFillKHR',desc='Record a command to fill a region in SVM with a pattern of a given pattern size',type='protos'] +-- +To record a command to fill a region in SVM with a pattern of a given +pattern size, call the function + +include::{generated}/api/protos/clCommandSVMMemFillKHR.txt[] +include::{generated}/api/version-notes/clCommandSVMMemFillKHR.asciidoc[] + + * _command_buffer_ refers to a valid command-buffer object. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the `<>` extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the `<>` extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] + * _svm_ptr_ is a pointer to a (if the device supports system SVM) + or SVM memory region that will be filled with _pattern_. + It must be aligned to _pattern_size_ bytes. + If _svm_ptr_ is allocated using {clSVMAlloc}, then it must be allocated + from the same context from which _command_queue_ was created. + Otherwise the behavior is undefined. + * _pattern_ is a pointer to the data pattern of size _pattern_size_ in + bytes. + _pattern_ will be used to fill a region in _buffer_ starting at + _svm_ptr_ and is _size_ bytes in size. + The data pattern must be a scalar or vector integer or floating-point + data type supported by OpenCL. + For example, if the region pointed to by _svm_ptr_ is to be filled with + a pattern of `float4` values, then _pattern_ will be a pointer to a + `cl_float4` value and _pattern_size_ will be `sizeof(cl_float4)`. + The maximum value of _pattern_size_ is the size of the largest integer + or floating-point vector data type supported by the OpenCL device. + The memory associated with _pattern_ can be reused or freed after the + function returns. + * _size_ is the size in bytes of region being filled starting with + _svm_ptr_ and must be a multiple of _pattern_size_. + * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify + synchronization-points that need to complete before this + particular command can be executed. ++ +If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ must be +0. +If _sync_point_wait_list_ is not `NULL`, the list of synchronization-points +pointed to by _sync_point_wait_list_ must be valid and +_num_sync_points_in_wait_list_ must be greater than 0. +The synchronization-points specified in _sync_point_wait_list_ are +*device-side* synchronization-points. +The command-buffer associated with synchronization-points in +_sync_point_wait_list_ must be the same as _command_buffer_. +The memory associated with _sync_point_wait_list_ can be reused or freed +after the function returns. + * _sync_point_ returns a synchronization-point ID that identifies this + particular command. + Synchronization-point objects are unique and can be used to identify + this command later on. + _sync_point_ can be `NULL` in which case it will not be possible for the + application to record a wait for this command to complete. + If the _sync_point_wait_list_ and the _sync_point_ arguments are not + `NULL`, the _sync_point_ argument should not refer to an element of the + _sync_point_wait_list_ array. + * _mutable_handle_ returns a handle to the command. + This parameter is unused, and **must** be `NULL`. // refError -{clEnqueueMarkerWithWaitList} returns {CL_SUCCESS} if the function is -successfully executed. -Otherwise, it returns one of the following errors: +{clCommandSVMMemFillKHR} returns {CL_SUCCESS} if the function is executed +successfully. Otherwise, it returns the errors defined by +{clEnqueueSVMMemFill} except: - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +{CL_INVALID_COMMAND_QUEUE} is replaced with: -[open,refpage='clEnqueueMarker',desc='Enqueues a marker command which waits for all previously enqueued commands to complete.',type='protos'] --- -To enqueue a marker command which waits for previous commands to complete, call -the function + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + `<>` extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. -include::{generated}/api/protos/clEnqueueMarker.txt[] -include::{generated}/api/version-notes/clEnqueueMarker.asciidoc[] +{CL_INVALID_CONTEXT} is replaced with: - * _command_queue_ is a valid host command-queue. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. + * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, + _command_buffer_, and _kernel_ are not the same. + +{CL_INVALID_EVENT_WAIT_LIST} is replaced with: + + * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is + `NULL` and _num_sync_points_in_wait_list_ is > 0, or + _sync_point_wait_list_ is not `NULL` and _num_sync_points_in_wait_list_ + is 0, or if synchronization-point objects in _sync_point_wait_list_ are + not valid synchronization-points. + +New errors: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. + * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. +-- + + +ifdef::cl_khr_command_buffer_multi_device[] +=== Remapping Command-Buffers + +If the `<>` extension is supported, +platforms reporting the {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} +capability support generating a deep copy of a command-buffer with its +commands remapped to a list of command-queues that are potentially +<> with the queues used to create the +command-buffer. +That is, the remapped command-buffer can execute on queues that differ in +terms of properties and/or associated device from the original +command-buffer queues. + +This functionality is invoked through a new synchronous entry-point +{clRemapCommandBufferKHR} which takes a list of queues to which the commands +should now target. +It then returns a command-buffer containing the same commands as the +original, with the same command dependencies, but targeting different +queues. +A list of command handles may also be passed to the entry-point, which +allows handles to the equivalent commands in the remapped command-buffer to +be returned by an output parameter. + +Device properties restrict remapping possibilities, as existing commands can +have a configuration which is not supported by another device, and so +remapping may fail with an error relating to this incompatibility. +Examples of command configurations which can introduce incompatibilities +when trying to map to a new device are: + + * Program language features used in a kernel not supported by the new + device. + * ND-Range configuration, e.g exceeds new the device max work-group size. + * Misalignment of sub-buffers based on minimum alignment of new device. + +In additional to this functionality, platforms reporting +{CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} allow the user to create a +remapped command-buffer where the mapping of queues to commands is +determined by the OpenCL runtime in a way it determines as optimal. +This is particularly useful in hot plugging environments where devices may +appear and disappear during runtime. + +[open,refpage='clRemapCommandBufferKHR',desc='Create copy of a command-buffer remapped to specified command-queues',type='protos'] +-- +To create a deep copy of the input command-buffer with the copied commands +remapped to target the passed command-queues, call the function + +include::{generated}/api/protos/clRemapCommandBufferKHR.txt[] +include::{generated}/api/version-notes/clRemapCommandBufferKHR.asciidoc[] + + * _command_buffer_ specifies the command-buffer to create a remapped deep + copy of. + * _automatic_ indicates if the remapping is done explicitly by the user, + or automatically by the OpenCL runtime. + If _automatic_ is {CL_FALSE}, then each element of _queues_ will replace + the queue used on _command_buffer_ creation at the same index. + If {CL_TRUE} and {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} is + supported, then the OpenCL runtime will decide in a way it determines + optimal which of the elements in _queues_ each command in the returned + command-buffer will be associated with. + * _num_queues_ is the number of command-queues listed in _queues_, must + not be 0. + * _queues_ is a pointer to an ordered list of command-queues for the + returned command-buffer to target, must be a non-`NULL` value. + * _num_handles_ is the number of command handles passed in both _handles_ + and _handles_ret_ lists, may be 0. + * _handles_ is an ordered list of handles belonging to _command_buffer_ to + create remapped copies of, may be `NULL`. + * _handles_ret_ returns an ordered list of handles where each handle is + equivalent to the handle at the same index in _handles_, but belonging + to the returned command-buffer. + * _errcode_ret_ returns an appropriate error code. + If _errcode_ret_ is `NULL`, no error code is returned. -The marker command waits for all commands previously enqueued in _command_queue_ to complete before it completes. -This command returns an _event_ which can be waited on, i.e. this event can be -waited on to insure that all previously enqueued commands, queued before this -command to _command_queue_, have completed. +The returned command-buffer has the same state as the input command-buffer, +unless the input command-buffer is in the <> state, in +which case the returned command-buffer has state <>. // refError -{clEnqueueMarker} returns {CL_SUCCESS} if the function is successfully -executed. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_INVALID_VALUE} if _event_ is `NULL`. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. +{clRemapCommandBufferKHR} returns a valid command-buffer with _errcode_ret_ +set to {CL_SUCCESS} if the command-buffer is created successfully. +Otherwise, it returns a `NULL` value without setting _handles_ret_, and with +one of the following error values returned in _errcode_ret_: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_VALUE} if _num_queues_ is 0, or if _queues_ is `NULL`. + * {CL_INVALID_VALUE} if _automatic_ is {CL_FALSE} and _num_queues_ is not + equal to the number of queues used on creation of _command_buffer_. + * {CL_INVALID_VALUE} if _handles_ or _handles_ret_ is `NULL` and + _num_handles_ is > 0, or either _handles_ or _handles_ret_ is not `NULL` + and _num_handles_ is 0. + * {CL_INVALID_VALUE} if any handle in _handles_ is not a valid command + handle belonging to _command_buffer_. + * {CL_INVALID_COMMAND_QUEUE} if any command-queue in _queues_ is not a + valid command-queue. + * {CL_INVALID_CONTEXT} if _command_buffer_ and all the command-queues in + _queues_ do not have the same OpenCL context. + * {CL_INVALID_OPERATION} if the platform does not support the + {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} flag. + * {CL_INVALID_OPERATION} if the platform does not support the + {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} flag and _automatic_ is + {CL_TRUE}. + * {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if such an error would be returned + by passing _queues_ to {clCreateCommandBufferKHR}. + * Any error relating to device support that can be returned by a command + recording entry-point may also be returned. + As a command in _command_buffer_ can have a configuration that is not + supported by a device that is associated with the queue in _queues_ the + command is being remapped to. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +endif::cl_khr_command_buffer_multi_device[] + +ifdef::cl_khr_command_buffer_mutable_dispatch[] +[[mutable-commands]] +=== Mutable Commands: + +A generic {cl_mutable_command_khr_TYPE} handle is called a _mutable-command_ +object as it can be returned from any command recording entry-point in the +`<>` family of extensions. +The mutable-command handles returned by {clCommandNDRangeKernelKHR} in +particular are referred to as _mutable-dispatch_ objects, and can be +modified through the fields of {cl_mutable_dispatch_config_khr_TYPE}. + +Mutable-command handles are updated between enqueues using entry-point +{clUpdateMutableCommandsKHR}. +To enable performant usage, all aspects of mutation are encapsulated inside +a single {cl_mutable_base_config_khr_TYPE} parameter. +This means that the runtime has access to all the information about how the +command-buffer will change, allowing the command-buffer to be rebuilt as +efficiently as possible. +Any modifications to the arguments or execution info of a mutable-dispatch +handle using {cl_mutable_dispatch_arg_khr_TYPE} or +{cl_mutable_dispatch_exec_info_khr_TYPE} have no affect on the original +kernel object used when the command was recorded, and only influence the +{clCommandNDRangeKernelKHR} command associated with the mutable-dispatch. + +[[mutable-dispatch-kernel-argument-safe-usage]] +[NOTE] +==== +The base `<>` extension +<> that a command-buffer +does not update the reference count of objects set as arguments on kernels +recorded into the command-buffer. + +The implications for applications using {clUpdateMutableCommandsKHR} is that +it is safe to delete objects used as kernel command arguments, if all the +kernel commands using that object as an argument have had their arguments +replaced with a different object. +==== -[open,refpage='clEnqueueWaitForEvents',desc='Enqueues a wait on a list of events to complete.',type='protos'] --- -To enqueue a wait for a specific event or a list of events to complete before any future commands queued in a command-queue are executed, call the function - -include::{generated}/api/protos/clEnqueueWaitForEvents.txt[] -include::{generated}/api/version-notes/clEnqueueWaitForEvents.asciidoc[] +To facilitate performant usage for pipelined work flows, where applications +repeatedly call command-buffer update then enqueue, implementations may +defer some of the work to allow {clUpdateMutableCommandsKHR} to return +immediately. +Deferring any recompilation until {clEnqueueCommandBufferKHR} avoids +blocking in host code and keeps device occupancy high. +This is only possible with a command-buffer created with the +{CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag, as without this the enqueued +command-buffer must complete before any modification occurs. - * _command_queue_ is a valid host command-queue. - * _event_list_ and _num_events_ specify events that need to complete before - this particular command can be executed. +[open,refpage='clUpdateMutableCommandsKHR',desc='Modify configuration of mutable-command handles to update behavior for future enqueues',type='protos'] +-- +To modify the configuration of mutable-command handles returned during +_command_buffer_ recording, updating the behavior of those commands in +future enqueues of _command_buffer_, call the function -// Note, this parameter is called event_list (like clWaitForEvents) rather than -// event_wait_list(like clEnqueueMarkerWithWaitList etc.) because the function -// predates wait lists (and CL_INVALID_EVENT_WAIT_LIST). +include::{generated}/api/protos/clUpdateMutableCommandsKHR.txt[] +include::{generated}/api/version-notes/clUpdateMutableCommandsKHR.asciidoc[] -The events specified in _event_list_ act as synchronization points. -The context associated with events in _event_list_ and _command_queue_ must be -the same. -The memory associated with _event_list_ can be reused or freed after the -function returns. + * _command_buffer_ refers to a valid command-buffer object. + * _mutable_config_ is a pointer to a {cl_mutable_base_config_khr_TYPE} + structure defining updates to make to mutable-commands. // refError -{clEnqueueWaitForEvents} returns {CL_SUCCESS} if the function is successfully -executed. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_list_ are not the same. - * {CL_INVALID_VALUE} if _num_events_ is 0 or _event_list_ is `NULL`. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. +{clUpdateMutableCommandsKHR} returns {CL_SUCCESS} if all the mutable-command +objects were updated successfully. +Otherwise, none of the updates to mutable-command objects are preserved and +one of the errors below is returned: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_OPERATION} if _command_buffer_ has not been finalized. + * {CL_INVALID_OPERATION} if _command_buffer_ was not created with the + {CL_COMMAND_BUFFER_MUTABLE_KHR} flag. + * {CL_INVALID_VALUE} if the _type_ member of _mutable_config_ is not + {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR}. + * {CL_INVALID_VALUE} if the _mutable_dispatch_list_ member of + _mutable_config_ is `NULL` and _num_mutable_dispatch_ > 0, or + _mutable_dispatch_list_ is not `NULL` and _num_mutable_dispatch_ is 0. + * {CL_INVALID_VALUE} if the _next_ member of _mutable_config_ is not + `NULL` and any iteration of the structure pointer chain does not contain + valid _type_ and _next_ members. + * {CL_INVALID_VALUE} if _mutable_config_ is `NULL`, or if both _next_ and + _mutable_dispatch_list_ members of _mutable_config_ are `NULL`. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. + +Using this function when _command_buffer_ is in the <> +state and not created with the {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag +causes undefined behavior. + +[NOTE] +==== +Performant usage is to call {clUpdateMutableCommandsKHR} only when the +desired state of all commands is known, rather than iteratively updating +each command individually. +==== + +[NOTE] +==== +If the command buffer has been created with +{CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, or the updated +ND-range command has been recorded with this flag, and the ND-range +parameters are updated so that the new number of work-groups exceeds the +number when the ND-range command was recorded, the behavior is undefined. +==== + +If the _mutable_dispatch_list_ member of _mutable_config_ is non-`NULL`, +then errors defined by {clEnqueueNDRangeKernel}, {clSetKernelExecInfo}, +{clSetKernelArg}, and {clSetKernelArgSVMPointer} are returned by +{clUpdateMutableCommandsKHR} if any of the array elements are set to an +invalid value. +Additionally, the following errors are returned if any +{cl_mutable_dispatch_config_khr_TYPE} element of the array violates the +defined conditions: + + * {CL_INVALID_MUTABLE_COMMAND_KHR} if _command_ is not a valid mutable + command object, or created from _command_buffer_. + * {CL_INVALID_VALUE} if _type_ is not + {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. + * {CL_INVALID_OPERATION} if the values of _local_work_size_ and/or + _global_work_size_ result in a change to work-group uniformity. + * {CL_INVALID_OPERATION} if the _work_dim_ is different from the + _work_dim_ set on _command_ recording. + * {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR} + property was not set on _command_ recording and _global_work_offset_ is + not `NULL`. + * {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR} + property was not set on _command_ recording and _global_work_size_ is + not `NULL`. + * {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR} + property was not set on _command_ recording and _local_work_size_ is not + `NULL`. + * {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR} + property was not set on _command_ recording and _num_args_ or + _num_svm_args_ is non-zero. + * {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR} + property was not set on _command_ recording and _num_exec_infos_ is + non-zero. + * {CL_INVALID_VALUE} if _arg_list_ is `NULL` and _num_args_ > 0, or + _arg_list_ is not `NULL` and _num_args_ is 0. + * {CL_INVALID_VALUE} if _arg_svm_list_ is `NULL` and _num_svm_args_ > 0, + or _arg_svm_list_ is not `NULL` and _num_svm_args_ is 0. + * {CL_INVALID_VALUE} if _exec_info_list_ is `NULL` and _num_exec_infos_ > + 0, or _exec_info_list_ is not `NULL` and _num_exec_infos_ is 0. +-- + +[open,refpage='cl_mutable_base_config_khr',desc='DESC',type='structs'] +-- +The {cl_mutable_base_config_khr_TYPE} structure is TODO Add fuller +description here and is defined as: + +include::{generated}/api/structs/cl_mutable_base_config_khr.txt[] + + * _type_ is the type of this structure, and must be + {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR} + * _next_ is `NULL` or a pointer to an extending structure. + * _num_mutable_dispatch_ is the number of mutable-dispatch objects to + configure in this enqueue of the command-buffer. + * _mutable_dispatch_list_ is an array containing _num_mutable_dispatch_ + elements describing the configurations of mutable kernel execution + commands in the command-buffer. + For a description of struct members making up each array element see + {cl_mutable_dispatch_config_khr_TYPE}. +-- + +[open,refpage='cl_mutable_dispatch_config_khr',desc='Set kernel configuration of a mutable clCommandNDRangeKernelKHR command',type='structs'] +-- +The {cl_mutable_dispatch_arg_khr_TYPE} structure is passed to +{clUpdateMutableCommandsKHR} to set the kernel configuration of a mutable +{clCommandNDRangeKernelKHR} command, and is defined as: + +include::{generated}/api/structs/cl_mutable_dispatch_config_khr.txt[] + + * _type_ is the type of this structure, and must be + {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. + * _next_ is `NULL` or a pointer to an extending structure. + * _command_ is a mutable-command object returned by + {clCommandNDRangeKernelKHR} representing a kernel execution as part of a + command-buffer. + * _num_args_ is the number of kernel arguments being changed. + * _num_svm_args_ is the number of SVM kernel arguments being changed. + * _num_exec_infos_ is the number of kernel execution info objects to set + for this dispatch. + * _work_dim_ is the number of dimensions used to specify the global + work-items and work-items in the work-group. + See {clEnqueueNDRangeKernel} for valid usage. + * _arg_list_ is an array describing the new kernel arguments for this + enqueue. + It must contain _num_args_ array elements, each of which encapsulates + parameters passed to {clSetKernelArg}. + See {clSetKernelArg} for usage of {cl_mutable_dispatch_arg_khr_TYPE} + members. + * _arg_svm_list_ is an array describing the new SVM kernel arguments for + this enqueue. + It must contain _num_svm_args_ array elements, each of which + encapsulates parameters passed to {clSetKernelArgSVMPointer}. + See {clSetKernelArgSVMPointer} for usage of + {cl_mutable_dispatch_arg_khr_TYPE} members, `arg_size` is ignored. + * _exec_info_list_ is an array containing _num_exec_infos_ elements + specifying the list of execution info objects use for this + command-buffer enqueue. + See {clSetKernelExecInfo} for usage of + {cl_mutable_dispatch_exec_info_khr_TYPE} members. + * _global_work_offset_ can be used to specify an array of _work_dim_ + unsigned values that describe the offset used to calculate the global ID + of a work-item. + If _global_work_offset_ is `NULL` then the global offset of the dispatch + is not changed. + See {clEnqueueNDRangeKernel} for valid usage. + * _global_work_size_ points to an array of _work_dim_ unsigned values that + describe the number of global work-items in _work_dim_ dimensions that + will execute the kernel function. + If _global_work_size_ is `NULL` then the number of global work-items in + the dispatch is not changed. + See {clEnqueueNDRangeKernel} for valid usage. + * _local_work_size_ points to an array of _work_dim_ unsigned values that + describe the number of work-items that make up a work-group that will + execute the kernel. + If _local_work_size_ is `NULL` then the number of local work-items in + the dispatch is not changed. + See {clEnqueueNDRangeKernel} for valid usage. -- -[open,refpage='clEnqueueBarrierWithWaitList',desc='A synchronization point that enqueues a barrier operation.',type='protos'] +[open,refpage='cl_mutable_dispatch_arg_khr',desc='Set kernel arguments normally passed using clSetKernelArg and clSetKernelArg',type='structs'] -- -To enqueue a barrier command which waits for events or commands to complete, -call the function +The {cl_mutable_dispatch_arg_khr_TYPE} structure sets kernel arguments +normally passed using {clSetKernelArg} and {clSetKernelArgSVMPointer}, and +is defined as: -include::{generated}/api/protos/clEnqueueBarrierWithWaitList.txt[] -include::{generated}/api/version-notes/clEnqueueBarrierWithWaitList.asciidoc[] +include::{generated}/api/structs/cl_mutable_dispatch_arg_khr.txt[] +-- - * _command_queue_ is a valid host command-queue. - * _event_wait_list_ and _num_events_in_wait_list_ specify events that need to - complete before this particular command can be executed. - * If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. - If _event_wait_list_ is not `NULL`, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - The events specified in _event_wait_list_ act as synchronization points. - The context associated with events in _event_wait_list_ and _command_queue_ - must be the same. - The memory associated with _event_wait_list_ can be reused or freed after - the function returns. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. +[open,refpage='cl_mutable_dispatch_exec_info_khr',desc='Specify kernel execution info',type='structs'] +-- +The {cl_mutable_dispatch_exec_info_khr_TYPE} structure sets kernel execution +info normally passed using {clSetKernelExecInfo}, and is defined as: -If _event_wait_list_ is `NULL`, then this particular command waits until all -previous enqueued commands to _command_queue_ have completed. +include::{generated}/api/structs/cl_mutable_dispatch_exec_info_khr.txt[] -The barrier command either waits for a list of events to complete, or if the -list is empty it waits for all commands previously enqueued in -_command_queue_ to complete before it completes. -This command blocks command execution, that is, any following commands -enqueued after it do not execute until it completes. -This command returns an _event_ which can be waited on, i.e. this event can -be waited on to insure that all events either in the _event_wait_list_ or -all previously enqueued commands, queued before this command to -_command_queue_, have completed. +[NOTE] +==== +_param_name_ is of type {cl_uint_TYPE} rather than +{cl_kernel_exec_info_TYPE} so that the extension can be implemented on +OpenCL 1.2 where the {cl_kernel_exec_info_TYPE} typedef is unavailable. +==== +-- +endif::cl_khr_command_buffer_mutable_dispatch[] -// refError -{clEnqueueBarrierWithWaitList} returns {CL_SUCCESS} if the function is -successfully executed. -Otherwise, it returns one of the following errors: +=== Command-Buffer Queries - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ and events - in _event_wait_list_ are not the same. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +[open,refpage='clGetCommandBufferInfoKHR',desc='Query information about a command-buffer',type='protos'] -- +To query information about a command-buffer, call the function + +include::{generated}/api/protos/clGetCommandBufferInfoKHR.txt[] +include::{generated}/api/version-notes/clGetCommandBufferInfoKHR.asciidoc[] + + * _command_buffer_ specifies the command-buffer being queried. + * _param_name_ specifies the information to query. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be {geq} size of return type as described in the table + below. + If _param_value_ is `NULL`, it is ignored. + * _param_value_ is a pointer to a memory location where the appropriate + result being queried is returned. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_value_. + If _param_value_size_ret_ is `NULL`, it is ignored. + +The list of supported _param_name_ values and the information returned in +_param_value_ by {clGetCommandBufferInfoKHR} is described in the table below. + +.{clGetCommandBufferInfoKHR} values +[cols=",,",options="header",] +|==== +| Command Buffer Info | Return Type | Description +| {CL_COMMAND_BUFFER_NUM_QUEUES_KHR_anchor} -[open,refpage='clEnqueueBarrier',desc='A synchronization point that enqueues a barrier operation.',type='protos'] --- -To enqueue a barrier command which waits for commands to complete, call the -function +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_NUM_QUEUES_KHR.asciidoc[] + | {cl_uint_TYPE} + | The number of command-queues specified when _command_buffer_ was created. -include::{generated}/api/protos/clEnqueueBarrier.txt[] -include::{generated}/api/version-notes/clEnqueueBarrier.asciidoc[] +| {CL_COMMAND_BUFFER_QUEUES_KHR_anchor} - * _command_queue_ is a valid host command-queue. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_QUEUES_KHR.asciidoc[] + | {cl_command_queue_TYPE}[] + | Return the list of command-queues specified when the + _command_buffer_ was created. -The barrier command waits for all commands previously enqueued in -_command_queue_ to complete before it completes. -This command blocks command execution, that is, any following commands -enqueued after it do not execute until it completes. -// TODO clEnqueueBarrierWithWaitList doesn't say synchronization point, should -// it, or should the next line be removed? The main difference is that -// clEnqueueBarrierWithWaitList returns an event, which is the synchronization -// point. -The barrier command is a synchronization point. +| {CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR_anchor} footnote:[{fn-reference-count-usage}] -// refError +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR.asciidoc[] + | {cl_uint_TYPE} + | Return the _command_buffer_ reference count. -{clEnqueueBarrier} returns {CL_SUCCESS} if the function is successfully -executed. -Otherwise, it returns one of the following errors: +| {CL_COMMAND_BUFFER_STATE_KHR_anchor} - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. --- +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_STATE_KHR.asciidoc[] + | {cl_command_buffer_state_khr_TYPE} + | Return the state of _command_buffer_. -== Out-of-order Execution of Kernels and Memory Object Commands + {CL_COMMAND_BUFFER_STATE_RECORDING_KHR_anchor} is returned when + _command_buffer_ has not been finalized. -The OpenCL functions that are submitted to a command-queue are enqueued in -the order the calls are made but can be configured to execute in-order or -out-of-order. -The _properties_ argument in {clCreateCommandQueueWithProperties} or -{clCreateCommandQueue} can be used to specify the execution order. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_STATE_RECORDING_KHR.asciidoc[] -If the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property of a command-queue is -not set, the commands enqueued to a command-queue execute in-order. -For example, if an application calls {clEnqueueNDRangeKernel} to execute -kernel A followed by a {clEnqueueNDRangeKernel} to execute kernel B, the -application can assume that kernel A finishes first and then kernel B is -executed. -If the memory objects output by kernel A are inputs to kernel B then kernel -B will see the correct data in memory objects produced by execution of -kernel A. -If the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property of a command-queue is -set, then there is no guarantee that kernel A will finish before kernel B -starts execution. + {CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR_anchor} is returned when + _command_buffer_ has been finalized and there is not a <> instance of _command_buffer_ awaiting completion on a + command_queue. -Applications can configure the commands enqueued to a command-queue to -execute out-of-order by setting the {CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} -property of the command-queue. -This can be specified when the command-queue is created. -In out-of-order execution mode there is no guarantee that the enqueued -commands will finish execution in the order they were queued. -As there is no guarantee that kernels will be executed in-order, i.e. based -on when the {clEnqueueNDRangeKernel} or {clEnqueueTask} calls are made within a -command-queue, it is therefore possible that an earlier -{clEnqueueNDRangeKernel} call to execute kernel A identified by event A may -execute and/or finish later than a {clEnqueueNDRangeKernel} call to execute -kernel B which was called by the application at a later point in time. -To guarantee a specific order of execution of kernels, a wait on a -particular event (in this case event A) can be used. -The wait for event A can be specified in the _event_wait_list_ argument to -{clEnqueueNDRangeKernel} for kernel B. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR.asciidoc[] -In addition, a marker ({clEnqueueMarker} or {clEnqueueMarkerWithWaitList}) or a -barrier ({clEnqueueBarrier} or {clEnqueueBarrierWithWaitList}) command can be -enqueued to the command-queue. -The marker command ensures that previously enqueued commands identified by -the list of events to wait for (or all previous commands) have finished. -A barrier command is similar to a marker command, but additionally -guarantees that no later-enqueued commands will execute until the waited-for -commands have executed. + {CL_COMMAND_BUFFER_STATE_PENDING_KHR_anchor} is returned when an + instance of _command_buffer_ has been enqueued for execution but not + yet completed. -Similarly, commands to read, write, copy or map memory objects that are -enqueued after {clEnqueueNDRangeKernel}, {clEnqueueTask} or -{clEnqueueNativeKernel} commands are not guaranteed to wait for kernels -scheduled for execution to have completed (if the -{CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} property is set). -To ensure correct ordering of commands, the event object returned by -{clEnqueueNDRangeKernel}, {clEnqueueTask} or {clEnqueueNativeKernel} can be -used to enqueue a wait for event or a barrier command can be enqueued that must -complete before reads or writes to the memory object(s) occur. +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_STATE_PENDING_KHR.asciidoc[] +| {CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR_anchor} -[[profiling-operations]] -== Profiling Operations on Memory Objects and Kernels +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR.asciidoc[] + | {cl_command_buffer_properties_khr_TYPE}[] + | Return the _properties_ argument specified in + {clCreateCommandBufferKHR}. -This section describes the profiling of OpenCL functions that are enqueued -as commands to a command-queue. Profiling of OpenCL commands can be enabled -by using a command-queue created with the {CL_QUEUE_PROFILING_ENABLE} -flag set in the {CL_QUEUE_PROPERTIES} bitfield in the _properties_ argument to -{clCreateCommandQueueWithProperties}, or in the _properties_ argument to -{clCreateCommandQueue}. -When profiling is enabled, the event objects that are created from -enqueuing a command store a timestamp for each of their state transitions. + If the _properties_ argument specified in {clCreateCommandBufferKHR} + used to create _command_buffer_ was not `NULL`, the implementation + must return the values specified in the properties argument. -[open,refpage='clGetEventProfilingInfo',desc='Returns profiling information for the command associated with event if profiling is enabled.',type='protos'] + If the _properties_ argument specified in {clCreateCommandBufferKHR} + used to create _command_buffer_ was `NULL`, the implementation may + return either a _param_value_size_ret_ of 0 (i.e. there is are no + properties to be returned), or the implementation may return a + property value of 0 (where 0 is used to terminate the properties + list). + +| {CL_COMMAND_BUFFER_CONTEXT_KHR_anchor} + +include::{generated}/api/version-notes/CL_COMMAND_BUFFER_CONTEXT_KHR.asciidoc[] + | {cl_context_TYPE} + | Return the context associated with _command_buffer_. + +|==== + +// refError + +{clGetCommandBufferInfoKHR} returns {CL_SUCCESS} if the function is executed +successfully. +Otherwise, it returns one of the following errors: + + * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid + command-buffer. + * {CL_INVALID_VALUE} if _param_name_ is not one of the supported values or + if size in bytes specified by _param_value_size_ is less than size of + return type and _param_value_ is not a `NULL` value. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. + * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources + required by the OpenCL implementation on the host. -- -To return profiling information for a command associated with an event when -profiling is enabled, call the function -include::{generated}/api/protos/clGetEventProfilingInfo.txt[] -include::{generated}/api/version-notes/clGetEventProfilingInfo.asciidoc[] +ifdef::cl_khr_command_buffer_mutable_dispatch[] +[open,refpage='clGetMutableCommandInfoKHR',desc='Query information about a mutable command object',type='protos'] +-- +To query information about a mutable command object, call the function - * _event_ specifies the event object. - * _param_name_ specifies the profiling data to query. +include::{generated}/api/protos/clGetMutableCommandInfoKHR.txt[] +include::{generated}/api/version-notes/clGetMutableCommandInfoKHR.asciidoc[] + + * _command_ specifies the mutable-command object being queried. + * _param_name_ specifies the information to query. The list of supported _param_name_ types and the information returned in - _param_value_ by {clGetEventProfilingInfo} is described in the - <> table. + _param_value_ by {clGetMutableCommandInfoKHR} is described in the + <> + table. + * _param_value_size_ is used to specify the size in bytes of memory + pointed to by _param_value_. + This size must be {geq} size of return type as described in the + <> + table. * _param_value_ is a pointer to memory where the appropriate result being queried is returned. If _param_value_ is `NULL`, it is ignored. - * _param_value_size_ is used to specify the size in bytes of memory pointed to - by _param_value_. - This size must be {geq} size of return type as described in the - <> table. * _param_value_size_ret_ returns the actual size in bytes of data being queried by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. -[[event-profiling-info-table]] -.List of supported param_names by {clGetEventProfilingInfo} +[[mutable-command-object-queries]] +._Mutable Command Object Queries_ [width="100%",cols="<33%,<17%,<50%",options="header"] |==== -| Event Profiling Info | Return Type | Description -| {CL_PROFILING_COMMAND_QUEUED_anchor} - -include::{generated}/api/version-notes/CL_PROFILING_COMMAND_QUEUED.asciidoc[] - | {cl_ulong_TYPE} - | A 64-bit value that describes the current device time counter in - nanoseconds when the command identified by event is enqueued in a - command-queue by the host. -| {CL_PROFILING_COMMAND_SUBMIT_anchor} - -include::{generated}/api/version-notes/CL_PROFILING_COMMAND_SUBMIT.asciidoc[] - | {cl_ulong_TYPE} - | A 64-bit value that describes the current device time counter in - nanoseconds when the command identified by event that has been - enqueued is submitted by the host to the device associated with the - command-queue. -| {CL_PROFILING_COMMAND_START_anchor} +| Mutable Command Info | Return Type | Description +| {CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR_anchor} -include::{generated}/api/version-notes/CL_PROFILING_COMMAND_START.asciidoc[] - | {cl_ulong_TYPE} - | A 64-bit value that describes the current device time counter in - nanoseconds when the command identified by event starts execution on - the device. -| {CL_PROFILING_COMMAND_END_anchor} +include::{generated}/api/version-notes/CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR.asciidoc[] + | {cl_command_queue_TYPE} + | Return the command-queue associated with _command_. + If `NULL` was passed as the queue when _command_ was recorded, then + the queue associated with the command-buffer that _command_ belongs to + is returned. +| {CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR.asciidoc[] + | {cl_command_buffer_khr_TYPE} + | Return the command-buffer associated with _command_. +| {CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR.asciidoc[] + | {cl_command_type_TYPE} + | Return the command-type associated with _command_. + + The list of supported event command types defined by {clGetEventInfo} + is used with the matching command. +| {CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR.asciidoc[] + | {cl_ndrange_kernel_command_properties_khr_TYPE}[] + | Return the properties argument specified on _command_ recording with + {clCommandNDRangeKernelKHR}. + + If the properties argument specified on creation of _command_ was not + `NULL`, the implementation must return the values specified in the + properties argument in the same order and without including additional + properties. + + If the properties argument specified on creation of _command_ was + `NULL`, or _command_ was not recorded from a + {clCommandNDRangeKernelKHR} command, the implementation must return + _param_value_size_ret_ equal to 0, indicating that there are no + properties to be returned. +| {CL_MUTABLE_DISPATCH_KERNEL_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_KERNEL_KHR.asciidoc[] + | {cl_kernel_TYPE} + | Return the kernel associated with _command_ when recorded with + {clCommandNDRangeKernelKHR}. + + If _command_ was not recorded from a {clCommandNDRangeKernelKHR} + command, the implementation must return _param_value_size_ret_ equal + to 0, indicating that the value returned in _param_value_ is not + valid. +| {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_DIMENSIONS_KHR.asciidoc[] + | {cl_uint_TYPE} + | Return the number of work-item dimensions specified when _command_ was + created. -include::{generated}/api/version-notes/CL_PROFILING_COMMAND_END.asciidoc[] - | {cl_ulong_TYPE} - | A 64-bit value that describes the current device time counter in - nanoseconds when the command identified by event has finished - execution on the device. -| {CL_PROFILING_COMMAND_COMPLETE_anchor} + If _command_ was not recorded from a {clCommandNDRangeKernelKHR} + command, the implementation must return _param_value_size_ret_ equal + to 0, indicating that the value returned in _param_value_ is not + valid. +| {CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR_anchor} -include::{generated}/api/version-notes/CL_PROFILING_COMMAND_COMPLETE.asciidoc[] - | {cl_ulong_TYPE} - | A 64-bit value that describes the current device time counter in - nanoseconds when the command identified by event and any child - commands enqueued by this command on the device have finished - execution. +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR.asciidoc[] + | {size_t_TYPE}[] + | Return the global work-item offset set on _command_ creation, or from + the most recent update via {clUpdateMutableCommandsKHR} where this + value was modified. + The output array contains _work_dim_ values, where _work_dim_ is + returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. + If a global work-item offset was not set, zero is returned for each + element in the array. + + If _command_ was not recorded from a {clCommandNDRangeKernelKHR} + command, the implementation must return _param_value_size_ret_ equal + to 0, indicating that the value returned in _param_value_ is not + valid. +| {CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR.asciidoc[] + | {size_t_TYPE}[] + | Return the global work-item size set on _command_ creation, or from + the most recent update via {clUpdateMutableCommandsKHR} where this + value was modified. + The output array contains _work_dim_ values, where _work_dim_ is + returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. + If a global work-item size was not set, zero is returned for each + element in the array. + + If _command_ was not recorded from a {clCommandNDRangeKernelKHR} + command, the implementation must return _param_value_size_ret_ equal + to 0, indicating that the value returned in _param_value_ is not + valid. +| {CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR_anchor} + +include::{generated}/api/version-notes/CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR.asciidoc[] + | {size_t_TYPE}[] + | Return the local work-item size set on _command_ creation, or from the + most recent update via {clUpdateMutableCommandsKHR} where this value + was modified. + The output array contains _work_dim_ values, where _work_dim_ is + returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. + If a local work-item size was not set, zero is returned for each + element in the array. + + If _command_ was not recorded from a {clCommandNDRangeKernelKHR} + command, the implementation must return _param_value_size_ret_ equal + to 0, indicating that the value returned in _param_value_ is not + valid. |==== -The unsigned 64-bit values returned can be used to measure the time in -nano-seconds consumed by OpenCL commands. - -OpenCL devices are required to correctly track time across changes in device -frequency and power states. -The {CL_DEVICE_PROFILING_TIMER_RESOLUTION} specifies the resolution of the -timer i.e. the number of nanoseconds elapsed before the timer is -incremented. - // refError -{clGetEventProfilingInfo} returns {CL_SUCCESS} if the function is executed -successfully and the profiling information has been recorded. +{clGetMutableCommandInfoKHR} returns {CL_SUCCESS} if the function is +executed successfully. Otherwise, it returns one of the following errors: - * {CL_PROFILING_INFO_NOT_AVAILABLE} if the {CL_QUEUE_PROFILING_ENABLE} flag is - not set for the command-queue, if the execution status of the command - identified by _event_ is not {CL_COMPLETE} or if _event_ is a user event - object. - Prior to OpenCL 3.0, implementations may return - {CL_PROFILING_INFO_NOT_AVAILABLE} for an event created by - {clEnqueueSvmFree}. * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes specified by _param_value_size_ is < size of return type as described in - the <> table and - _param_value_ is not `NULL`. - * {CL_INVALID_EVENT} if _event_ is a not a valid event object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + the <> + table and _param_value_ is not `NULL`. + * {CL_INVALID_MUTABLE_COMMAND_KHR} if _command_ is not a valid mutable + command object. + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +endif::cl_khr_command_buffer_mutable_dispatch[] +endif::cl_khr_command_buffer[] -== Flush and Finish - -[open,refpage='clFlush',desc='Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.',type='protos'] --- -To flush commands to a device, call the function - -include::{generated}/api/protos/clFlush.txt[] -include::{generated}/api/version-notes/clFlush.asciidoc[] - - * _command_queue_ is the command-queue to flush. - -All previously queued OpenCL commands in _command_queue_ are issued to the -device associated with _command_queue_. -{clFlush} only guarantees that all queued commands to _command_queue_ will -eventually be submitted to the appropriate device. -There is no guarantee that they will be complete after {clFlush} returns. - -Any blocking commands queued in a command-queue and {clReleaseCommandQueue} -perform an implicit flush of the command-queue. -These blocking commands are {clEnqueueReadBuffer}, -{clEnqueueReadBufferRect}, {clEnqueueReadImage}, with _blocking_read_ set to -{CL_TRUE}; {clEnqueueWriteBuffer}, {clEnqueueWriteBufferRect}, -{clEnqueueWriteImage} with _blocking_write_ set to {CL_TRUE}; -{clEnqueueMapBuffer}, {clEnqueueMapImage} with _blocking_map_ set to -{CL_TRUE}; {clEnqueueSVMMemcpy} with _blocking_copy_ set to {CL_TRUE}; -{clEnqueueSVMMap} with _blocking_map_ set to {CL_TRUE} or {clWaitForEvents}. - -To use event objects that refer to commands enqueued in a command-queue as -event objects to wait on by commands enqueued in a different command-queue, -the application must call a {clFlush} or any blocking commands that perform -an implicit flush of the command-queue where the commands that refer to -these event objects are enqueued. -// refError +ifdef::cl_khr_gl_sharing[] +[[querying-devices-that-support-sharing-with-opengl]] +== Querying Devices That Support Sharing With OpenGL -{clFlush} returns {CL_SUCCESS} if the function call was executed successfully. -Otherwise, it returns one of the following errors: +OpenCL device(s) corresponding to an OpenGL context may be queried. +Such a device may not always exist (for example, if an OpenGL context is +specified on a GPU not supporting OpenCL command-queues, but which does +support shared OpenCL/OpenGL memory objects), and if it does exist, may +change over time. +When such a device does exist, acquiring and releasing shared OpenCL/OpenGL +memory objects may be faster on a command-queue corresponding to this device +than on command-queues corresponding to other devices available to an OpenCL +context. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. +[open,refpage='clGetGLContextInfoKHR',desc='Query OpenCL device corresponding to an OpenGL context',type='protos'] -- +To query the OpenCL device corresponding to an OpenGL context, call the +function -[open,refpage='clFinish',desc='Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.',type='protos'] --- -To wait for completion of commands on a device, call the function +include::{generated}/api/protos/clGetGLContextInfoKHR.txt[] -include::{generated}/api/protos/clFinish.txt[] -include::{generated}/api/version-notes/clFinish.asciidoc[] + * _properties_ points to an property list whose format and valid contents + are identical to the _properties_ argument of {clCreateContext}. + _properties_ must identify a single valid GL context or GL share group + object. + * _param_name_ is a constant that specifies the device types to query, and + must be one of the values shown in the <> table below. + * _param_value_ is a pointer to memory where the result of the query is + returned, as described in the <> table. + If _param_value_ is `NULL`, it is ignored. + * _param_value_size_ specifies the size in bytes of memory pointed to by + _param_value_. + This size must be greater than or equal to the size of the return type + described in the table below. + * _param_value_size_ret_ returns the actual size in bytes of data being + queried by _param_value_. + If _param_value_size_ret_ is `NULL`, it is ignored. - * _command_queue_ is the command-queue to wait for. +[[gl-context-info-table]] +.Supported Device Types for {clGetGLContextInfoKHR} +[cols="2,1,2",options="header",] +|==== +| param_name | Return Type | Information returned in param_value +| {CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR_anchor} -All previously queued OpenCL commands in _command_queue_ are issued to the -associated device, and the function blocks until all previously queued -commands have completed. -{clFinish} does not return until all previously queued commands in -_command_queue_ have been processed and completed. -{clFinish} is also a synchronization point. +include::{generated}/api/version-notes/CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR.asciidoc[] + | {cl_device_id_TYPE} + | Return the OpenCL device currently associated with the specified + OpenGL context. +| {CL_DEVICES_FOR_GL_CONTEXT_KHR_anchor} + +include::{generated}/api/version-notes/CL_DEVICES_FOR_GL_CONTEXT_KHR.asciidoc[] + | {cl_device_id_TYPE}[] + | Return all OpenCL devices which may be associated with the specified + OpenGL context. +|==== // refError -{clFinish} returns {CL_SUCCESS} if the function call was executed +{clGetGLContextInfoKHR} returns {CL_SUCCESS} if the function is executed successfully. +If no device(s) exist corresponding to _param_name_, the call will not fail, +but the value of _param_value_size_ret_ will be zero. Otherwise, it returns one of the following errors: - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host - command-queue. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a context was specified for + an OpenGL or OpenGL ES implementation using the EGL, GLX, or WGL binding + APIs, as <>; and + any of the following conditions hold: + ** The specified display and context properties do not identify a valid + OpenGL or OpenGL ES context. + ** The specified context does not support buffer and renderbuffer objects. + ** The specified context is not compatible with the OpenCL context being + created (for example, it exists in a physically distinct address space, + such as another hardware device; or it does not support sharing data + with OpenCL due to implementation restrictions). + * {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR} if a share group was specified + for a CGL-based OpenGL implementation by setting the property + {CL_CGL_SHAREGROUP_KHR}, and the specified share group does not identify + a valid CGL share group object. + * {CL_INVALID_OPERATION} if a context was specified as described above and + any of the following conditions hold: + ** A context or share group object was specified for one of CGL, EGL, GLX, + or WGL and the OpenGL implementation does not support that + window-system binding API. + ** More than one of the properties {CL_CGL_SHAREGROUP_KHR}, + {CL_EGL_DISPLAY_KHR}, {CL_GLX_DISPLAY_KHR}, and {CL_WGL_HDC_KHR} is set + to a non-default value. + ** Both of the properties {CL_CGL_SHAREGROUP_KHR} and {CL_GL_CONTEXT_KHR} + are set to non-default values. + ** Any of the devices specified in the argument cannot support + OpenCL objects which share the data store of an OpenGL object. + * {CL_INVALID_VALUE} if an property name other than those specified in + _table 4.5_ is specified in _properties_. + * {CL_INVALID_VALUE} if _param_name_ is not one of the values listed in + the <> table, or if the size in bytes + specified by _param_value_size_ is less than the size of the return type + shown in the table and _param_value_ is not a `NULL` value + * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources + required by the OpenCL implementation on the device * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -- +endif::cl_khr_gl_sharing[] diff --git a/api/provisional_notice.asciidoc b/api/provisional_notice.asciidoc new file mode 100644 index 00000000..7f0720c5 --- /dev/null +++ b/api/provisional_notice.asciidoc @@ -0,0 +1,11 @@ +// Copyright 2023-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +[NOTE] +==== +This is a provisional OpenCL extension specification that has been Ratified under the Khronos Intellectual Property Framework. +It is being made publicly available as a provisional extension to enable review and feedback from the community. +While it is a provisional extension features may be added, removed, or changed in non-backward compatible ways. + +If you have feedback please create an issue on: https://github.com/KhronosGroup/OpenCL-Docs/ +==== diff --git a/c/feature-dictionary.asciidoc b/c/feature-dictionary.asciidoc index ce776703..4943b36b 100644 --- a/c/feature-dictionary.asciidoc +++ b/c/feature-dictionary.asciidoc @@ -129,3 +129,19 @@ endif::[] ifndef::backend-html5[] :opencl_c_work_group_collective_functions: pass:q[`\__opencl_c_​work_​group_​collective_​functions`] endif::[] + +// opencl_c_integer_dot_product_input_4x8bit +ifdef::backend-html5[] +:opencl_c_integer_dot_product_input_4x8bit: pass:q[`\__opencl_c_integer_dot_product_input_4x8bit`] +endif::[] +ifndef::backend-html5[] +:opencl_c_integer_dot_product_input_4x8bit: pass:q[`\__opencl_c_​integer_​dot_​product_​input_​4x8bit`] +endif::[] + +// opencl_c_integer_dot_product_input_4x8bit_packed +ifdef::backend-html5[] +:opencl_c_integer_dot_product_input_4x8bit_packed: pass:q[`\__opencl_c_integer_dot_product_input_4x8bit_packed`] +endif::[] +ifndef::backend-html5[] +:opencl_c_integer_dot_product_input_4x8bit_packed: pass:q[`\__opencl_c_​integer_​dot_​product_​input_​4x8bit_​packed`] +endif::[] diff --git a/c/footnotes.asciidoc b/c/footnotes.asciidoc index 6047fd0a..4045e8e6 100644 --- a/c/footnotes.asciidoc +++ b/c/footnotes.asciidoc @@ -177,7 +177,10 @@ An OpenCL C 3.0 compiler must also define the `+__opencl_c_int64+` feature macro ] :fn-mad-caution: pass:n[ \ -The user is cautioned that for some usages, e.g. *mad*(a, b, -a*b), the definition of *mad*() is loose enough in the embedded profile that almost any result is allowed from *mad*() for some values of a and b. \ +The user is cautioned that for some usages, e.g. *mad*(a, b, -a*b), the \ +definition of *mad*() is loose enough in the embedded profile \ +or with half-precision arguments \ +that almost any result is allowed from *mad*() for some values of a and b. \ ] :fn-memory-scope-restrictions: pass:n[ \ diff --git a/config/opencl.asciidoc b/config/opencl.asciidoc index db190b49..62f166b4 100644 --- a/config/opencl.asciidoc +++ b/config/opencl.asciidoc @@ -6,6 +6,7 @@ :khronos-opencl-repo: https://github.com/KhronosGroup/OpenCL-Docs :khronos-opencl-pr: {khronos-opencl-repo}/pull +:OpenCLCSpecURL: OpenCL_C.html :blank: pass:[ +] :pp: ++ diff --git a/config/rouge_opencl.rb b/config/rouge_opencl.rb index e7c26d47..d30913eb 100644 --- a/config/rouge_opencl.rb +++ b/config/rouge_opencl.rb @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # # frozen_string_literal: true -# Copyright (c) 2011-2024 The Khronos Group, Inc. +# Copyright 2011-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 #puts "Loading rouge_opencl extensions for source code highlighting..." diff --git a/ext/cl_khr_3d_image_writes.asciidoc b/ext/cl_khr_3d_image_writes.asciidoc deleted file mode 100644 index 2ebfa10e..00000000 --- a/ext/cl_khr_3d_image_writes.asciidoc +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_3d_image_writes]] -== Writing to 3D Image Objects - -This section describes the *cl_khr_3d_image_writes* extension. - -This extension adds built-in functions that allow a kernel to write to 3D image objects in addition to 2D image objects. - -This extension became a core feature in OpenCL 2.0. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -The new built-in functions are described in the table below: - -// Editors note: There are no access qualifiers on these built-in -// functions, because read-write images did not exist pre-OpenCL 2.0. - -._3D Image Built-in Image Write Functions_ -[cols=",",options="header",] -|======================================================================= -|*Function* -|*Description* - -|void *write_imagef* ( + -image3d_t _image_, + -int4 _coord_, + -float4 _color_) + -{blank} -void *write_imagei* ( + -image3d_t _image_, + -int4 _coord_, + -int4 _color_) + -{blank} -void *write_imageui* ( + -image3d_t _image_, + -int4 _coord_, + -uint4 _color_) -|Write _color_ value to the location specified by coordinate (_x_, _y_, _z_) in the 3D image specified by _image_. -The appropriate data format conversion to the specified image format is done before writing the color value. -_coord.x_, _coord.y_, and _coord.z_ are considered to be unnormalized coordinates and must be in the range 0 ... image width - 1, 0 ... image height - 1, and 0 ... image depth - 1. + -{blank} -*write_imagef* can only be used with image objects created with _image_channel_data_type_ set to one of the pre-defined packed formats or set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16}, {CL_HALF_FLOAT}, or {CL_FLOAT}. Appropriate data format conversion will be done to convert the channel data from a floating-point value to the actual data format in which the channels are stored. + -{blank} -*write_imagei* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: + -{CL_SIGNED_INT8}, + -{CL_SIGNED_INT16}, or + -{CL_SIGNED_INT32}. + -{blank} -*write_imageui* can only be used with image objects created with _image_channel_data_type_ set to one of the following values: + -{CL_UNSIGNED_INT8}, + -{CL_UNSIGNED_INT16}, or + -{CL_UNSIGNED_INT32}. + -{blank} -The behavior of *write_imagef*, *write_imagei*, and *write_imageui* for image objects created with _image_channel_data_type_ values not specified in the description above, or with (_x_, _y_, _z_) coordinate values that are not in the range (0 ... image width - 1, 0 ... image height - 1, 0 ... image depth - 1) respectively, is undefined. - -|======================================================================= diff --git a/ext/cl_khr_async_work_group_copy_fence.asciidoc b/ext/cl_khr_async_work_group_copy_fence.asciidoc deleted file mode 100644 index 5a2656c6..00000000 --- a/ext/cl_khr_async_work_group_copy_fence.asciidoc +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_async_work_group_copy_fence]] -== Async Work-group Copy Fence - -This section describes the *cl_khr_async_work_group_copy_fence* extension. -The extension adds a new built-in function to OpenCL C to establish a memory synchronization ordering of asynchronous copies. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 0.9.0 | First assigned version (provisional). -| 2021-11-10 | 1.0.0 | First non-provisional version. -|==== - -[[cl_khr_async_work_group_copy_fence-additions-to-chapter-6-of-the-opencl-specification]] -=== Additions to Chapter 6 of the OpenCL C Specification - -The following new built-in function is added to the _Async Copies from Global to -Local Memory, Local to Global Memory, and Prefetch_ functions described in _section 6.12.10_ -and _section 6.13.10_ of the OpenCL 1.2 and OpenCL 2.0 C specifications: - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* |*Description* -|[source,opencl_c] ----- -void async_work_group_copy_fence( - cl_mem_fence_flags flags) ----- -| Orders async copies produced by the work-items of a work-group executing -a kernel. Async copies preceding the *async_work_group_copy_fence* must -complete their access to the designated memory or memories, -including both reads-from and writes-to it, before async copies -following the fence are allowed to start accessing these memories. -In other words, every async copy preceding the *async_work_group_copy_fence* -must happen-before every async copy following the fence, with respect to -the designated memory or memories. - -The _flags_ argument specifies the memory address space and can be set to a -combination of the following literal values: - -`CLK_LOCAL_MEM_FENCE` + -`CLK_GLOBAL_MEM_FENCE` - -The async fence is performed by all work-items in a work-group and this -built-in function must therefore be encountered by all work-items in a -work-group executing the kernel with the same argument values; -otherwise the results are undefined. This rule applies to ND-ranges -implemented with uniform and non-uniform work-groups. -|======================================================================= diff --git a/ext/cl_khr_byte_addressable_store.asciidoc b/ext/cl_khr_byte_addressable_store.asciidoc deleted file mode 100644 index 0386a981..00000000 --- a/ext/cl_khr_byte_addressable_store.asciidoc +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_byte_addressable_store]] -== Byte Addressable Stores - -This section describes the *cl_khr_byte_addressable_store* extension. -This extension relaxes restrictions on pointers to `char`, `uchar`, `char2`, `uchar2`, `short`, `ushort` and `half` that were present in _Section 6.8m: Restrictions_ of the OpenCL 1.0 specification. -With this extension, applications are able to read from and write to pointers to these types. - -This extension became a core feature in OpenCL 1.1. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== diff --git a/ext/cl_khr_command_buffer.asciidoc b/ext/cl_khr_command_buffer.asciidoc deleted file mode 100644 index 2c543eea..00000000 --- a/ext/cl_khr_command_buffer.asciidoc +++ /dev/null @@ -1,2020 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_command_buffer]] -== Command Buffers (Provisional) - -This extension adds the ability to record and replay buffers of OpenCL commands. - -=== General Information - -==== Name Strings - -`cl_khr_command_buffer` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-11-10 | 0.9.0 | First assigned version (provisional). -| 2022-08-24 | 0.9.1 | Specify an error if a command-buffer is finalized multiple times (provisional). -| 2023-03-31 | 0.9.2 | Introduce context query {CL_COMMAND_BUFFER_CONTEXT_KHR} (provisional). -| 2023-04-04 | 0.9.3 | Remove Invalid command-buffer state (provisional). -| 2023-05-11 | 0.9.4 | Add clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR command entries (provisional). -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension is written against the OpenCL Specification version 3.0.6. - -This extension requires OpenCL 1.2 or later. Buffering of SVM commands -requires OpenCL 2.0 or later. - -==== Contributors - -Ewan Crawford, Codeplay Software Ltd. + -Gordon Brown, Codeplay Software Ltd. + -Kenneth Benzie, Codeplay Software Ltd. + -Alastair Murray, Codeplay Software Ltd. + -Jack Frankland, Codeplay Software Ltd. + -Balaji Calidas, Qualcomm Technologies Inc. + -Joshua Kelly, Qualcomm Technologies, Inc. + -Kevin Petit, Arm Ltd. + -Aharon Abramson, Intel. + -Ben Ashbaugh, Intel. + -Boaz Ouriel, Intel. + -Chris Gearing, Intel. + -Pekka Jääskeläinen, Tampere University and Intel. + -Jan Solanti, Tampere University + -Nikhil Joshi, NVIDIA + -James Price, Google + -Brice Videau, Argonne National Laboratory + - -=== Overview - -Command-buffers enable a reduction in overhead when enqueuing the same -workload multiple times. By separating the command-queue setup from dispatch, -the ability to replay a set of previously created commands is introduced. - -Device-side _cl_sync_point_khr_ synchronization-points can be used within -command-buffers to define command dependencies. This allows the commands of a -command-buffer to execute out-of-order on a single <> -command-queue. The command-buffer itself has no inherent in-order/out-of-order -property, this ordering is inferred from the command-queue used on command -recording. Out-of-order enqueues without event dependencies of both regular -commands, such as {clEnqueueFillBuffer}, and command-buffers are allowed to -execute concurrently, and it is up to the user to express any dependencies using -events. - -The command-queues a command-buffer will be executed on can be set on replay via -parameters to {clEnqueueCommandBufferKHR}, provided they are -<> with the command-queues used on command-buffer -recording. - -==== Background - -On embedded devices where building a command stream accounts for a significant -expenditure of resources and where workloads are often required to be pipelined, -a solution that minimizes driver overhead can significantly improve the -utilization of accelerators by removing a bottleneck in repeated command stream -generation. - -An additional motivator is lowering task execution latency, as devices can be -kept occupied with work by repeated submissions, without having to wait on -the host to construct commands again for a similar workload. - -==== Rationale - -The command-buffer abstraction over the generation of command streams is a -proven approach which facilitates a significant reduction in driver overhead in -existing real-world applications with repetitive pipelined workloads which are -built on top of Vulkan, DirectX 12, and Metal. - -A primary goal is for a command-buffer to avoid any interaction with -application code after being enqueued until all recorded commands have -completed. As such, any command which maps or migrates memory objects; reads -or writes memory objects; or enqueues a native kernel, is not available for -command-buffer recording. Finally commands recorded into a command buffer do -not wait for or return event objects, these are instead replaced with -device-side synchronization-point identifiers which enable out-of-order -execution when enqueued on <> command-queues. - -Adding new entry-points for individual commands, rather than recording existing -command-queue APIs with begin/end markers was a design decision made for the -following reasons: - -* Individually specified entry points makes it clearer to the user what's - supported, as opposed to adding a large number of error conditions - throughout the specification with all the restrictions. - -* Prevents code forking in existing entry points for the implementer, as - otherwise separate paths in each entry point need to be maintained for both - the recording and normal cases. - -* Allows the definition of a new device-side synchronization primitive rather - than overloading {cl_event_TYPE}. As use of {cl_event_TYPE} in individual commands - allows host interaction from callback and user-events, as well as introducing - complexities when a command-buffer is enqueued multiple times regarding - profiling and execution status. - -* New entry points facilitate returning handles to individual commands, allowing - those commands to be modified between enqueues of the command buffer. Not all - command handles are used in this extension, but providing them facilitates - other extensions layered on top to take advantage of them to provide additional - mutable functionality. - -==== Simultaneous Use - -The optional simultaneous use capability was added to the extension so that -vendors can support pipelined workflows, where command-buffers are repeatedly -enqueued without blocking in user code. However, simultaneous use may result in -command-buffers being more expensive to enqueue than in a sequential model, so -the capability is optional to enable optimizations on command-buffer recording. - -=== Interactions with Other Extensions - -The introduction of the command-buffer abstraction enables functionality -beyond what the `cl_khr_command_buffer` extension currently provides, i.e. -the recording of immutable commands to a single queue which can then be -executed without commands synchronizing outside the command-buffer. It is -intended that extra functionality expanding on this will be provided as layered -extensions on top of `cl_khr_command_buffer`. - -Having `cl_khr_command_buffer` as a minimal base specification means that the -API defines mechanisms for functionality that is not enabled by this extension, -these are described in the following sub-sections. `cl_khr_command_buffer` will -retain its provisional extension status until other layered extensions are -released, as these may reveal modifications needed to the base specification to -support their intended use cases. - -==== ND-range Kernel Command Properties - -The {clCommandNDRangeKernelKHR} entry-point defines a `properties` parameter of -new type {cl_ndrange_kernel_command_properties_khr_TYPE}. No properties are defined -in `cl_khr_command_buffer`, but the parameter is intended to enable future -functionality that would change the characteristics of the kernel command. - -==== Command Handles - -All command recording entry-points define a {cl_mutable_command_khr_TYPE} output -parameter which provides a handle to the specific command being recorded. Use of -these output handles is not enabled by the `cl_khr_command_buffer` extension, -but the handles will allow individual commands in a command-buffer to be -referenced by the user. In particular, the capability for an application to use -these handles to modify commands between enqueues of a command-buffer is -envisaged. - -==== List of Queues - -Only a single command-queue can be associated with a command-buffer in the -`cl_khr_command_buffer` extension, but the API is designed with the intention -that a future extension will allow commands to be recorded across multiple -queues in the same command-buffer, providing replay of heterogeneous task -graphs. - -Using multiple queue functionality will result in an error without any layered -extensions to relax usage of the following API features: - -* When a command-buffer is created the API enables passing a list of queues - that the command-buffer will record commands to. Only a single queue is - permitted in `cl_khr_command_buffer`. - -* Individual command recording entry-points define a {cl_command_queue_TYPE} - parameter for which of the queues set on command-buffer creation that command - should be record to. This must be passed as NULL in `cl_khr_command_buffer`. - -* {clEnqueueCommandBufferKHR} takes a list of queues for command-buffer execution, - correspond to those set on creation. Only a single queue is permitted in - `cl_khr_command_buffer`. - -=== New Types - -==== Command Buffer Types - -Bitfield for querying command-buffer capabilities of an OpenCL device with -{clGetDeviceInfo}, see <>: -[source] ----- -typedef cl_bitfield cl_device_command_buffer_capabilities_khr ----- - -Types describing <>: - -[source] ----- -// Returned by clCreateCommandBufferKHR() -typedef struct _cl_command_buffer_khr* cl_command_buffer_khr; - -// Unique ID to a device-side synchronization-point used to describe the -// ordering of commands when recording a command-buffer. Valid for use -// only within the same command-buffer during recording. -typedef cl_uint cl_sync_point_khr; - -// Handle returned on command recording -typedef struct _cl_mutable_command_khr* cl_mutable_command_khr; - -// Properties of a clCommandNDRangeKernelKHR command -typedef cl_properties cl_ndrange_kernel_command_properties_khr; - -// Properties for command-buffer creation -typedef cl_properties cl_command_buffer_properties_khr; - -// Bitfield representing flags for command-buffers -typedef cl_bitfield cl_command_buffer_flags_khr; - -// Enumerated type for use in clGetCommandBufferInfoKHR() -typedef cl_uint cl_command_buffer_info_khr; - -// Return type for CL_COMMAND_BUFFER_STATE_KHR in clGetCommandBufferInfoKHR() -typedef cl_uint cl_command_buffer_state_khr; ----- - -=== New API Functions - -Command-buffer entry points from <>: -[source] ----- -cl_command_buffer_khr clCreateCommandBufferKHR( - cl_uint num_queues, - const cl_command_queue* queues, - const cl_command_buffer_properties_khr* properties, - cl_int* errcode_ret); - -cl_int clRetainCommandBufferKHR(cl_command_buffer_khr command_buffer); - -cl_int clReleaseCommandBufferKHR(cl_command_buffer_khr command_buffer); - -cl_int clFinalizeCommandBufferKHR(cl_command_buffer_khr command_buffer); - -cl_int clEnqueueCommandBufferKHR( - cl_uint num_queues, - cl_command_queue* queues, - cl_command_buffer_khr command_buffer, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -cl_int clCommandBarrierWithWaitListKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandCopyBufferKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_buffer, - size_t src_offset, - size_t dst_offset, - size_t size, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandCopyBufferRectKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_buffer, - const size_t* src_origin, - const size_t* dst_origin, - const size_t* region, - size_t src_row_pitch, - size_t src_slice_pitch, - size_t dst_row_pitch, - size_t dst_slice_pitch, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandCopyBufferToImageKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_image, - size_t src_offset, - const size_t* dst_origin, - const size_t* region, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandCopyImageKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem src_image, - cl_mem dst_image, - const size_t* src_origin, - const size_t* dst_origin, - const size_t* region, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandCopyImageToBufferKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem src_image, - cl_mem dst_buffer, - const size_t* src_origin, - const size_t* region, - size_t dst_offset, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandFillBufferKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem buffer, - const void* pattern, - size_t pattern_size, - size_t offset, - size_t size, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandFillImageKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - cl_mem image, - const void* fill_color, - const size_t* origin, - const size_t* region, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandNDRangeKernelKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - const cl_ndrange_kernel_command_properties_khr* properties, - cl_kernel kernel, - cl_uint work_dim, - const size_t* global_work_offset, - const size_t* global_work_size, - const size_t* local_work_size, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clGetCommandBufferInfoKHR( - cl_command_buffer_khr command_buffer, - cl_command_buffer_info_khr param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); ----- - -The following SVM entry points are supported only with at least OpenCL 2.0 and -starting from 0.9.4 of this extension: - -[source] ----- - -cl_int clCommandSVMMemcpyKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - void* dst_ptr, - const void* src_ptr, - size_t size, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); - -cl_int clCommandSVMMemFillKHR( - cl_command_buffer_khr command_buffer, - cl_command_queue command_queue, - void* svm_ptr, - const void* pattern, - size_t pattern_size, - size_t size, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr* sync_point_wait_list, - cl_sync_point_khr* sync_point, - cl_mutable_command_khr* mutable_handle); ----- - -=== New API Enums - -Enums for querying device command-buffer capabilities with -{clGetDeviceInfo}, see <>: - -[source] ----- -// Accepted values for the param_name parameter to clGetDeviceInfo -CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR 0x12A9 -CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR 0x12AA - -// Bits for cl_device_command_buffer_capabilities_khr bitfield -CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR (0x1 << 0) -CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR (0x1 << 1) -CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR (0x1 << 2) -CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR (0x1 << 3) - -// Values for cl_command_buffer_state_khr -CL_COMMAND_BUFFER_STATE_RECORDING_KHR 0x0 -CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR 0x1 -CL_COMMAND_BUFFER_STATE_PENDING_KHR 0x2 ----- - -Enums for base <> functionality: - -[source] ----- -// Error codes -CL_INVALID_COMMAND_BUFFER_KHR -1138 -CL_INVALID_SYNC_POINT_WAIT_LIST_KHR -1139 -CL_INCOMPATIBLE_COMMAND_QUEUE_KHR -1140 - -// Bitfield to clCreateCommandBufferKHR -CL_COMMAND_BUFFER_FLAGS_KHR 0x1293 - -// Bits for cl_command_buffer_flags_khr bitfield -CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR (0x1 << 0) - -// cl_command_buffer_info_khr queries to clGetCommandBufferInfoKHR -CL_COMMAND_BUFFER_QUEUES_KHR 0x1294 -CL_COMMAND_BUFFER_NUM_QUEUES_KHR 0x1295 -CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR 0x1296 -CL_COMMAND_BUFFER_STATE_KHR 0x1297 -CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR 0x1298 -CL_COMMAND_BUFFER_CONTEXT_KHR 0x1299 - -// cl_event command-buffer enqueue command type -CL_COMMAND_COMMAND_BUFFER_KHR 0x12A8 ----- - -=== Modifications to section 4.2 of the OpenCL API Specification - -Add to *Table 5*, _Device Queries_, of section 4.2: - -[[command-buffer-queries]] -[caption="Table 5. "] -.List of supported param_names by {clGetDeviceInfo} -[cols="1,1,4",options="header"] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR} -| {cl_device_command_buffer_capabilities_khr_TYPE} -| Describes device command-buffer capabilities, encoded as bits in a bitfield. - Supported capabilities are: - - {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR} Device supports the ability - to record commands that execute kernels which contain printf calls. - - {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR} Device supports the - ability to record commands that execute kernels which contain device-side - kernel-enqueue calls. - - {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR} Device supports the - command-buffers having a <> that exceeds 1. - - {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR} Device supports the ability - to record command-buffers to out-of-order command-queues. - -| {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR} -| {cl_command_queue_properties_TYPE} -| Bitmask of the minimum properties with which a command-queue must be created - to allow a command-buffer to be executed on it. It is valid for a - command-queue to be created with extra properties in addition to this - base requirement and still be compatible with command-buffer execution. -|==== - -[[command-buffers]] -=== Add new section "Section 5.X - Command Buffers" to OpenCL API Specification - -A _command-buffer_ object represents a series of operations to be enqueued -on one or more command-queues without any application code interaction. -Grouping the operations together allows efficient enqueuing of repetitive -operations, as well as enabling driver optimizations. - -Command-buffers are _sequential use_ by default, but may also be set to -_simultaneous use_ on creation if the device optionally supports this -capability. A sequential use command-buffer must have a <> of 0 or 1. The simultaneous use capability removes this -restriction and allows command-buffers to have a <> greater than 1. - -[[compatible]] -Command-buffers are created using an ordered list of command-queues that -commands are recorded to and execute on by default. These command-queues can be -replaced on command-buffer enqueue with different command-queues, provided for -each element in the replacement list the substitute command-queue is compatible -with the command-queue used on command-buffer creation. Where a _compatible_ -command-queue is defined as a command-queue with identical properties targeting -the same device and in the same OpenCL context. - -While constructing a command-buffer it is valid for the user to interleave calls -to the same queue which create commands, such as {clCommandNDRangeKernelKHR}, with -queue submission calls, such as {clEnqueueNDRangeKernel} or -{clEnqueueCommandBufferKHR}. That is, there is no effect on queue state from -recording commands. The purpose of the queue parameter is to define the device -and properties of the command, which are constant queries on the queue object. - -A command-buffer object should increment the reference count of attached OpenCL -objects such as queues, buffers, images, and kernels referenced in commands -recorded to the command-buffer. This enables correct behavior of the -command-buffer when its attached objects have been released. On destruction of -the command-buffer it should decrement these reference counts, allowing the -attached objects to be freed if appropriate. - -[[command-buffer-kernel-argument-ref-counting]] -[NOTE] -==== -A command-buffer object does not update the reference count of objects set as -arguments on kernels recorded into the command-buffer. This is consistent with -the reference counting behavior of {clSetKernelArg}. - -Applications should ensure that objects passed as arguments to kernels recorded -to a command-buffer are not deleted until the command-buffer has been released. -Undefined behavior may result from the failure to follow this usage requirement -for all the command-buffers an object is used as a kernel argument in. - -If using layered extension `cl_khr_command_buffer_mutable_dispatch`, -<>. -==== - -==== Add new section "Section 5.X.1 - Command Buffer Lifecycle" - -A command-buffer is always in one of the following states: - -[[recording]] -Recording:: Initial state of a command-buffer on creation, where commands can be -recorded to the command-buffer. - -[[executable]] -Executable:: State after command recording has finished with -{clFinalizeCommandBufferKHR} and the command-buffer may be enqueued. - -[[pending]] -Pending:: Once a command-buffer has been enqueued to a command-queue it enters -the Pending state until completion, at which point it moves back to the -<> state. - -//// -Image generated from the following mermaid diagram description using https://mermaid.live -Ideally we'd use the asciidoctor-diagram extension to generate the rendered diagram, but -there are issues installing the gem with ruby 2.3.3 - -[mermaid, "Lifecycle of a command-buffer", png] -.... -stateDiagram-v2 - [*] --> Recording: Create - Recording -->Executable: Finalize - Executable --> Pending: Enqueue - Pending --> Executable: Completion -.... -//// - -image::images/commandbuffer_lifecycle.png[align="center", title="Lifecycle of a command-buffer."] - -[[pending_count]] -The Pending Count is the number of copies of the command -buffer in the <> state. By default a command-buffer's Pending -Count must be 0 or 1. If the command-buffer was created with -{CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} then the command-buffer may have a -Pending Count greater than 1. - -==== Add new section "Section 5.X.2 - Creating Command Buffer Objects" - -The function - -include::{generated}/api/protos/clCreateCommandBufferKHR.txt[] - -Is used to create a command-buffer that can record commands to the specified -queues. - -[NOTE] -==== -Upon creation the command-buffer is defined as being in the -<> state, in order for the command-buffer to be enqueued -it must first be finalized using {clFinalizeCommandBufferKHR} after which no -further commands can be recorded. A command-buffer is submitted for execution -on command-queues with a call to {clEnqueueCommandBufferKHR}. -==== - -_num_queues_ The number of command-queues listed in _queues_. This extension -only supports a single command-queue, so this **must** be one. - -_queues_ Is a pointer to a command-queue that the command-buffer commands will -be recorded to. _queues_ must be a non-`NULL` value. - -_properties_ Specifies a list of properties for the command-buffer and their -corresponding values. Each property name is immediately followed by the -corresponding desired value. The list is terminated with 0. -The list of supported properties is described in the table below. If a -supported property and its value is not specified in properties, its -default value will be used. _properties_ can be `NULL` in which case the -default values for supported command-buffer properties will be used. - -[[commandbuffer-properties]] -.{clCreateCommandBufferKHR} properties -[cols=",,",options="header",] -|==== -| *Recording Properties* -| *Property Value* -| *Description* - -| {CL_COMMAND_BUFFER_FLAGS_KHR} -| {cl_command_buffer_flags_khr_TYPE} -| This is a bitfield and can be set to a combination of the following values: - - {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} - Allow multiple instances of the - command-buffer to be submitted to the device for execution. If set, devices - must support {CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR}. - - The default value of this property is `0`. -|==== - -_errcode_ret_ Returns an appropriate error code. If _errcode_ret_ is `NULL`, no -error code is returned. - -{clCreateCommandBufferKHR} returns a valid non-zero command-buffer and -_errcode_ret_ is set to {CL_SUCCESS} if the command-buffer is created -successfully. Otherwise, it returns a `NULL` value with one of the following -error values returned in _errcode_ret_: - -* {CL_INVALID_COMMAND_QUEUE} if any command-queue in _queues_ is not a valid - command-queue. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if any command-queue in _queues_ is an - out-of-order command-queue and the device associated with the command-queue - does not support the {CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR} - capability. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the properties of any command-queue in - _queues_ does not contain the minimum properties specified by - {CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR}. - -* {CL_INVALID_CONTEXT} if all the command-queues in _queues_ do not have the - same OpenCL context. - -* {CL_INVALID_VALUE} if _num_queues_ is not one. - -* {CL_INVALID_VALUE} if _queues_ is `NULL`. - -* {CL_INVALID_VALUE} if values specified in _properties_ are not valid, or if - the same property name is specified more than once. - -* {CL_INVALID_PROPERTY} if values specified in _properties_ are valid but are - not supported by all the devices associated with command-queues in _queues_. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources - required by the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clRetainCommandBufferKHR.txt[] - -Increments the _command_buffer_ reference count. - -_command_buffer_ Specifies the command-buffer to retain. - -{clRetainCommandBufferKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by - the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clReleaseCommandBufferKHR.txt[] - -Decrements the _command_buffer_ reference count. - -[NOTE] -==== -After the _command_buffer_ reference count becomes zero and has finished -execution, the command-buffer is deleted. -==== - -_command_buffer_ Specifies the command-buffer to release. - -{clReleaseCommandBufferKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources - required by the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -==== Add new section "Section 5.X.2 - Enqueuing a Command Buffer" - -The function - -include::{generated}/api/protos/clFinalizeCommandBufferKHR.txt[] - -Finalizes command recording ready for enqueuing the command-buffer on a -command-queue. - -[NOTE] -==== -{clFinalizeCommandBufferKHR} places the command-buffer in the -<> state where commands can no longer be recorded, at -this point the command-buffer is ready to be enqueued. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -{clFinalizeCommandBufferKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ is not in the - <> state. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required - by the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clEnqueueCommandBufferKHR.txt[] - -Enqueues a command-buffer to execute on command-queues specified by _queues_, -or on default command-queues used during recording if _queues_ is empty. - -[NOTE] -==== -To enqueue a command-buffer it must be in a <> state, -see {clFinalizeCommandBufferKHR}. -==== - -_num_queues_ The number of command-queues listed in _queues_. - -_queues_ A pointer to an ordered list of command-queues -<> with the command-queues used on recording. _queues_ -can be `NULL` in which case the default command-queues used on command-buffer -creation are used and _num_queues_ must be 0. - -_command_buffer_ Refers to a valid command-buffer object. - -_event_wait_list_, _num_events_in_wait_list_ Specify events that need to -complete before this particular command can be executed. If -_event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. If _event_wait_list_ is `NULL`, -_num_events_in_wait_list_ must be 0. If event_wait_list is not `NULL`, -the list of events pointed to by _event_wait_list_ must be valid and -_num_events_in_wait_list_ must be greater than 0. The events specified -in _event_wait_list_ act as synchronization points. The context associated -with events in _event_wait_list_ and command_queue must be the same. The memory -associated with _event_wait_list_ can be reused or freed after the function -returns. - -_event_ Returns an event object that identifies this command and -can be used to query for profiling information or queue a wait for this -particular command to complete. _event_ can be `NULL` in which case it will not -be possible for the application to wait on this command or query it for -profiling information. - -{clEnqueueCommandBufferKHR} returns {CL_SUCCESS} if the command-buffer -execution was successfully queued, or one of the errors below: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has not been finalized. - -* {CL_INVALID_OPERATION} if _command_buffer_ was not created with the - {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag and is in the <> state. - -* {CL_INVALID_VALUE} if _queues_ is `NULL` and _num_queues_ is > 0, or _queues_ - is not `NULL` and _num_queues_ is 0. - -* {CL_INVALID_VALUE} if _num_queues_ is > 0 and not the same value as - _num_queues_ set on _command_buffer_ creation. - -* {CL_INVALID_COMMAND_QUEUE} if any element of _queues_ is not a valid - command-queue. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if any element of _queues_ is not - <> with the command-queue set on _command_buffer_ - creation at the same list index. - -* {CL_INVALID_CONTEXT} if any element of _queues_ does not have the same - context as the command-queue set on _command_buffer_ creation at the same list - index. - -* {CL_INVALID_CONTEXT} if context associated with _command_buffer_ and - events in _event_wait_list_ are not the same. - -* {CL_OUT_OF_RESOURCES} if there is a failure to queue the execution instance of - _command_buffer_ on the command-queues because of insufficient resources - needed to execute _command_buffer_. - -* {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` - and _num_events_in_wait_list_ is 0, or if event objects in - _event_wait_list_ are not valid events. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required - by the OpenCL implementation on the host. - -==== Add new section "Section 5.X.3 - Recording Commands to a Command Buffer" - -The function - -include::{generated}/api/protos/clCommandBarrierWithWaitListKHR.txt[] - -Records a barrier operation used as a synchronization point. - -[NOTE] -==== -{clCommandBarrierWithWaitListKHR} Waits for either a list of -synchronization-points to complete, or if the list is empty it waits for all -commands previously recorded in _command_buffer_ to complete before it -completes. This command blocks command execution, that is, any following -commands recorded after it do not execute until it completes. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -If _sync_point_wait_list_ is `NULL`, then this particular command -waits until all previous recorded commands to _command_queue_ have -completed. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this barrier command later on. _sync_point_ can be `NULL` in -which case it will not be possible for the application to record a wait -for this command to complete. If the _sync_point_wait_list_ and the -_sync_point_ arguments are not `NULL`, the _sync_point_ argument -should not refer to an element of the _sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandBarrierWithWaitListKHR} returns {CL_SUCCESS} if the function is -executed successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and - _command_buffer_ is not the same. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by - the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clCommandCopyBufferKHR.txt[] - -Records a command to copy from one buffer object to another. - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_src_buffer_, _dst_buffer_, _src_offset_, _dst_offset_, _size_ Refer to -{clEnqueueCopyBuffer}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandCopyBufferKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueCopyBuffer} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandCopyBufferRectKHR.txt[] - -Records a command to copy a rectangular region from a buffer object to another -buffer object. - -[NOTE] -==== -{clCommandCopyBufferRectKHR} records a command to copy a 2D or 3D rectangular -region from the buffer object identified by _src_buffer_ to a 2D or 3D region -in the buffer object identified by _dst_buffer_. Copying begins at the source -offset and destination offset which are computed as described in the -description for _src_origin_ and _dst_origin_. - -Each byte of the region's width is copied from the source offset to the -destination offset. After copying each width, the source and destination -offsets are incremented by their respective source and destination row -pitches. After copying each 2D rectangle, the source and destination offsets -are incremented by their respective source and destination slice pitches. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_src_origin_, _dst_origin_, _region_, _src_row_pitch_, _src_slice_pitch_, -_dst_row_pitch_, _dst_slice_pitch_ Refer to {clEnqueueCopyBufferRect}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandCopyBufferRectKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueCopyBufferRect} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandCopyBufferToImageKHR.txt[] - -Records a command to copy a buffer object to an image object. - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_src_buffer_, _dst_image_, _src_offset_, _dst_origin_, _region_ Refer to -{clEnqueueCopyBufferToImage} - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandCopyBufferToImageKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueCopyBufferToImage} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_image_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandCopyImageKHR.txt[] - -Records a command to copy image objects. - -[NOTE] -==== -It is currently a requirement that the _src_image_ and _dst_image_ image -memory objects for {clCommandCopyImageKHR} must have the exact same image -format, i.e. the {cl_image_format_TYPE} descriptor specified when _src_image_ and -_dst_image_ are created must match. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_src_image_, _dst_image_, _src_origin_, _dst_origin_, _region_ Refer to -{clEnqueueCopyImage}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandCopyImageKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueCopyImage} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_image_, and _dst_image_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandCopyImageToBufferKHR.txt[] - -Records a command to copy an image object to a buffer object. - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_src_image_, _dst_buffer_, _src_origin_, _region_, _dst_offset_ -Refer to {clEnqueueCopyImageToBuffer}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandCopyImageToBufferKHR} returns {CL_SUCCESS} if the function is -executed successfully. Otherwise, it returns the errors defined by -{clEnqueueCopyImageToBuffer} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_image_, and _dst_buffer_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandFillBufferKHR.txt[] - -Records a command to fill a buffer object with a pattern of a given pattern -size. - -[NOTE] -==== -The usage information which indicates whether the memory object can be read or -written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument -value specified when _buffer_ is created is ignored by -{clCommandFillBufferKHR}. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_buffer_, _pattern_, _pattern_size_, _offset_, _size_ Refer to -{clEnqueueFillBuffer}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandFillBufferKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueFillBuffer} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _buffer_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandFillImageKHR.txt[] - -Records a command to fill an image object with a specified color. - -[NOTE] -==== -The usage information which indicates whether the memory object can be read or -written by a kernel and/or the host and is given by the {cl_mem_flags_TYPE} argument -value specified when image is created is ignored by {clCommandFillImageKHR}. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_image_, _fill_color_, _origin_, _region_ Refer to {clEnqueueFillImage}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandFillImageKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueFillImage} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _image_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -The function - -include::{generated}/api/protos/clCommandNDRangeKernelKHR.txt[] - -Records a command to execute a kernel on a device. - -[NOTE] -==== -The work-group size to be used for _kernel_ can also be specified in the -program source using the -`+__attribute__((reqd_work_group_size(X, Y, Z)))+` qualifier. In this case the -size of work-group specified by _local_work_size_ must match the value -specified by the `reqd_work_group_size` `+__attribute__+` qualifier. - -These work-group instances are executed in parallel across multiple compute -units or concurrently on the same compute unit. - -Each work-item is uniquely identified by a global identifier. The global ID, -which can be read inside the kernel, is computed using the value given by -_global_work_size_ and _global_work_offset_. In addition, a work-item is -also identified within a work-group by a unique local ID. The local ID, -which can also be read by the kernel, is computed using the value given by -_local_work_size_. The starting local ID is always (0, 0, ... 0). -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_properties_ Specifies a list of properties for the kernel command and their -corresponding values. Each property name is immediately followed by the -corresponding desired value. The list is terminated with 0. If no properties are -required, _properties_ may be `NULL`. This extension does not define any -properties. - -_kernel_ A valid kernel object which **must** have its arguments set. Any -changes to _kernel_ after calling {clCommandNDRangeKernelKHR}, such as with -{clSetKernelArg} or {clSetKernelExecInfo}, have no effect on the recorded -command. If _kernel_ is recorded to a following {clCommandNDRangeKernelKHR} -command however, then that command will capture the updated state of _kernel_. - -_work_dim_, _global_work_offset_, _global_work_size_, _local_work_size_ Refer -to {clEnqueueNDRangeKernel}. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandNDRangeKernelKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueNDRangeKernel} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_VALUE} if values specified in _properties_ are not valid - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -* {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not - support {CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR} and _kernel_ contains - a printf call. - -* {CL_INVALID_OPERATION} if the device associated with _command_queue_ does not - support {CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR} and _kernel_ - contains a kernel-enqueue call. - - -The function - -include::{generated}/api/protos/clCommandSVMMemcpyKHR.txt[] - -Records a command to do an SVM memcpy operation. - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_dst_ptr_ is the pointer to a host (if the device supports system SVM) -or SVM memory allocation where data is copied to. - -_src_ptr_ is the pointer to a host (if the device supports system SVM) -or SVM memory allocation where data is copied from. - -_size_ is the size in bytes of data being copied. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandSVMMemcpyKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueSVMMemcpy} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - - -The function - -include::{generated}/api/protos/clCommandSVMMemFillKHR.txt[] - -Records a command to fill a region in SVM with a pattern of a given pattern size. - -_command_buffer_ Refers to a valid command-buffer object. - -_command_queue_ Specifies the command-queue the command will be recorded to. -Parameter is unused by this extension as only a single command-queue is -supported and **must** be `NULL`. - -_svm_ptr_ is a pointer to a (if the device supports system SVM) -or SVM memory region that will be filled with _pattern_. It must be -aligned to _pattern_size_ bytes. -If _svm_ptr_ is allocated using {clSVMAlloc} then it must be allocated from -the same context from which _command_queue_ was created. -Otherwise the behavior is undefined. - -_pattern_ is a pointer to the data pattern of size _pattern_size_ in bytes. -_pattern_ will be used to fill a region in _buffer_ starting at _svm_ptr_ -and is _size_ bytes in size. -The data pattern must be a scalar or vector integer or floating-point data -type supported by OpenCL. -For example, if region pointed to by _svm_ptr_ is to be filled with a -pattern of float4 values, then _pattern_ will be a pointer to a cl_float4 -value and _pattern_size_ will be `sizeof(cl_float4)`. -The maximum value of _pattern_size_ is the size of the largest integer or -floating-point vector data type supported by the OpenCL device. -The memory associated with _pattern_ can be reused or freed after the -function returns. - -_size_ is the size in bytes of region being filled starting with _svm_ptr_ -and must be a multiple of _pattern_size_. - -_sync_point_wait_list_, _num_sync_points_in_wait_list_ Specify -synchronization-points that need to complete before this -particular command can be executed. - -If _sync_point_wait_list_ is `NULL`, _num_sync_points_in_wait_list_ -must be 0. If _sync_point_wait_list_ is not `NULL`, the list of -synchronization-points pointed to by _sync_point_wait_list_ must be -valid and _num_sync_points_in_wait_list_ must be greater than 0. -The synchronization-points specified in _sync_point_wait_list_ are -*device-side* synchronization-points. The command-buffer associated -with synchronization-points in _sync_point_wait_list_ must be the same -as _command_buffer_. The memory associated with _sync_point_wait_list_ -can be reused or freed after the function returns. - -_sync_point_ Returns a synchronization-point ID that identifies this particular -command. Synchronization-point objects are unique and can be used to -identify this command later on. _sync_point_ can be `NULL` in which case it -will not be possible for the application to record a wait for this command to -complete. If the _sync_point_wait_list_ and the _sync_point_ arguments are not -`NULL`, the _sync_point_ argument should not refer to an element of the -_sync_point_wait_list_ array. - -_mutable_handle_ Returns a handle to the command. Handle is unused by -this extension and must be passed as `NULL`. - -{clCommandSVMMemFillKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns the errors defined by -{clEnqueueSVMMemFill} except: - -{CL_INVALID_COMMAND_QUEUE} is replaced with: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -{CL_INVALID_CONTEXT} is replaced with: - -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. - -{CL_INVALID_EVENT_WAIT_LIST} is replaced with: - -* {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is `NULL` and - _num_sync_points_in_wait_list_ is > 0, or _sync_point_wait_list_ is not - `NULL` and _num_sync_points_in_wait_list_ is 0, or if - synchronization-point objects in _sync_point_wait_list_ are not valid - synchronization-points. - -New errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. - -* {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. - -==== Add new section "Section 5.X.4 - Command Buffer Queries" - -The function - -include::{generated}/api/protos/clGetCommandBufferInfoKHR.txt[] - -Queries information about a command-buffer. - -_command_buffer_ Specifies the command-buffer being queried. - -_param_name_ Specifies the information to query. - -_param_value_size_ Specifies the size in bytes of memory pointed to by -_param_value_. This size must be ≥ size of return type as described in the table -below. If _param_value_ is `NULL`, it is ignored. - -_param_value_ A pointer to memory where the appropriate result being queried is -returned. If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ret_ Returns the actual size in bytes of data being queried by -_param_value_. If _param_value_size_ret_ is `NULL`, it is ignored. - -The list of supported _param_name_ values and the information returned in -_param_value_ by {clGetCommandBufferInfoKHR} is described in the table below. - -.{clGetCommandBufferInfoKHR} values -[cols=",,",options="header",] -|==== -| Command Buffer Info -| Return Type -| Description - -| {CL_COMMAND_BUFFER_NUM_QUEUES_KHR} -| {cl_uint_TYPE} -| The number of command-queues specified when _command_buffer_ was created. - -| {CL_COMMAND_BUFFER_QUEUES_KHR} -| {cl_command_queue_TYPE}[] -| Return the list of command-queues specified when the _command_buffer_ was - created. - -| {CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR} footnote:[{fn-reference-count-usage}] -| {cl_uint_TYPE} -| Return the _command_buffer_ reference count. - -| {CL_COMMAND_BUFFER_STATE_KHR} -| {cl_command_buffer_state_khr_TYPE} -| Return the state of _command_buffer_. - - {CL_COMMAND_BUFFER_STATE_RECORDING_KHR} is returned when _command_buffer_ has - not been finalized. - - {CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR} is returned when _command_buffer_ - has been finalized and there is not a <> instance of - _command_buffer_ awaiting completion on a command_queue. - - {CL_COMMAND_BUFFER_STATE_PENDING_KHR} is returned when an instance of - _command_buffer_ has been enqueued for execution but not yet completed. - -| {CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR} -| {cl_command_buffer_properties_khr_TYPE}[] -| Return the _properties_ argument specified in {clCreateCommandBufferKHR}. - - If the _properties_ argument specified in {clCreateCommandBufferKHR} used to - create _command_buffer_ was not `NULL`, the implementation must return the - values specified in the properties argument. - - If the _properties_ argument specified in {clCreateCommandBufferKHR} used to - create _command_buffer_ was `NULL`, the implementation may return either a - _param_value_size_ret_ of 0 (i.e. there is are no properties to be returned), - or the implementation may return a property value of 0 (where 0 is used to - terminate the properties list). - -| {CL_COMMAND_BUFFER_CONTEXT_KHR} -| {cl_context_TYPE} -| Return the context associated with _command_buffer_. - -|==== - -{clGetCommandBufferInfoKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_VALUE} if _param_name_ is not one of the supported values - or if size in bytes specified by _param_value_size_ is less than size of - return type and _param_value_ is not a `NULL` value. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by - the OpenCL implementation on the host. - -=== Modifications to section 5.11 of the OpenCL API Specification - -In the opening paragraph add {clEnqueueCommandBufferKHR} to list of commands that -can return an event object. - -Add to Table 37, _Event Command Types_: -[cols=",",options="header"] -|==== -| Events Created By -| Event Command Type - -| {clEnqueueCommandBufferKHR} -| {CL_COMMAND_COMMAND_BUFFER_KHR} -|==== - -=== Sample Code - -[source] ----- - #define CL_CHECK(ERROR) \ - if (ERROR) { \ - std::cerr << "OpenCL error: " << ERROR << "\n"; \ - return ERROR; \ - } - - int main() { - cl_platform_id platform; - CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); - cl_device_id device; - CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr)); - - cl_int error; - cl_context context = - clCreateContext(nullptr, 1, &device, nullptr, nullptr, &error); - CL_CHECK(error); - - const char* code = R"OpenCLC( - kernel void vector_addition(global int* tile1, global int* tile2, - global int* res) { - size_t index = get_global_id(0); - res[index] = tile1[index] + tile2[index]; - } - )OpenCLC"; - const size_t length = std::strlen(code); - - cl_program program = - clCreateProgramWithSource(context, 1, &code, &length, &error); - CL_CHECK(error); - - CL_CHECK(clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr)); - - cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); - CL_CHECK(error); - - constexpr size_t frame_count = 60; - constexpr size_t frame_elements = 1024; - constexpr size_t frame_size = frame_elements * sizeof(cl_int); - - constexpr size_t tile_count = 16; - constexpr size_t tile_elements = frame_elements / tile_count; - constexpr size_t tile_size = tile_elements * sizeof(cl_int); - - cl_mem buffer_tile1 = - clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, nullptr, &error); - CL_CHECK(error); - cl_mem buffer_tile2 = - clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, nullptr, &error); - CL_CHECK(error); - cl_mem buffer_res = - clCreateBuffer(context, CL_MEM_WRITE_ONLY, tile_size, nullptr, &error); - CL_CHECK(error); - - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(buffer_tile1), &buffer_tile1)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(buffer_tile2), &buffer_tile2)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(buffer_res), &buffer_res)); - - cl_command_queue command_queue = - clCreateCommandQueue(context, device, - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); - CL_CHECK(error); - - cl_command_buffer_khr command_buffer = - clCreateCommandBufferKHR(1, &command_queue, nullptr, &error); - CL_CHECK(error); - - cl_mem buffer_src1 = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - cl_mem buffer_src2 = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - cl_mem buffer_dst = - clCreateBuffer(context, CL_MEM_WRITE_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - - cl_sync_point_khr tile_sync_point = 0; - for (size_t tile_index = 0; tile_index < tile_count; tile_index++) { - std::array copy_sync_points; - CL_CHECK(clCommandCopyBufferKHR(command_buffer, - command_queue, buffer_src1, buffer_tile1, tile_index * tile_size, 0, - tile_size, tile_sync_point ? 1 : 0, - tile_sync_point ? &tile_sync_point : nullptr, ©_sync_points[0]), - nullptr); - CL_CHECK(clCommandCopyBufferKHR(command_buffer, - command_queue, buffer_src2, buffer_tile2, tile_index * tile_size, 0, - tile_size, tile_sync_point ? 1 : 0, - tile_sync_point ? &tile_sync_point : nullptr, ©_sync_points[1]), - nullptr); - - cl_sync_point_khr nd_sync_point; - CL_CHECK(clCommandNDRangeKernelKHR(command_buffer, - command_queue, nullptr, kernel, 1, nullptr, &tile_elements, nullptr, - copy_sync_points.size(), copy_sync_points.data(), &nd_sync_point, - nullptr)); - - CL_CHECK(clCommandCopyBufferKHR(command_buffer, - command_queue, buffer_res, buffer_dst, 0, tile_index * tile_size, - tile_size, 1, &nd_sync_point, &tile_sync_point, nullptr)); - } - - CL_CHECK(clFinalizeCommandBufferKHR(command_buffer)); - - std::random_device random_device; - std::mt19937 random_engine{random_device()}; - std::uniform_int_distribution random_distribution{ - 0, std::numeric_limits::max() / 2}; - auto random_generator = [&]() { return random_distribution(random_engine); }; - - for (size_t frame_index = 0; frame_index < frame_count; frame_index++) { - std::array write_src_events; - std::vector src1(frame_elements); - std::generate(src1.begin(), src1.end(), random_generator); - CL_CHECK(clEnqueueWriteBuffer(command_queue, buffer_src1, CL_FALSE, 0, - frame_size, src1.data(), 0, nullptr, - &write_src_events[0])); - std::vector src2(frame_elements); - std::generate(src2.begin(), src2.end(), random_generator); - CL_CHECK(clEnqueueWriteBuffer(command_queue, buffer_src2, CL_FALSE, 0, - frame_size, src2.data(), 0, nullptr, - &write_src_events[1])); - - CL_CHECK(clEnqueueCommandBufferKHR(0, NULL, command_buffer, 2, - write_src_events.data(), nullptr)); - - CL_CHECK(clFinish(command_queue)); - - CL_CHECK(clReleaseEvent(write_src_event[0])); - CL_CHECK(clReleaseEvent(write_src_event[1])); - } - - CL_CHECK(clReleaseCommandBufferKHR(command_buffer)); - CL_CHECK(clReleaseCommandQueue(command_queue)); - - CL_CHECK(clReleaseMemObject(buffer_src1)); - CL_CHECK(clReleaseMemObject(buffer_src2)); - CL_CHECK(clReleaseMemObject(buffer_dst)); - - CL_CHECK(clReleaseMemObject(buffer_tile1)); - CL_CHECK(clReleaseMemObject(buffer_tile2)); - CL_CHECK(clReleaseMemObject(buffer_res)); - - CL_CHECK(clReleaseKernel(kernel)); - CL_CHECK(clReleaseProgram(program)); - CL_CHECK(clReleaseContext(context)); - - return 0; - } ----- - -=== Issues - -. Introduce a `clCloneCommandBufferKHR` entry-point for cloning a - command-buffer. -+ --- -*UNRESOLVED* --- -. Enable detached command-buffer execution, where command-buffers are executed - on their own internal queue to prevent locking user created queues for the - duration of their execution. -+ --- -*UNRESOLVED* --- diff --git a/ext/cl_khr_command_buffer_multi_device.asciidoc b/ext/cl_khr_command_buffer_multi_device.asciidoc deleted file mode 100644 index cd1c638a..00000000 --- a/ext/cl_khr_command_buffer_multi_device.asciidoc +++ /dev/null @@ -1,767 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_command_buffer_multi_device]] -== Command Buffers - Multiple Devices (Provisional) - -This extension enables users to record commands across multiple queues in the same command-buffer, -providing execution of heterogeneous task graphs from command-queues associated with different devices. - -=== General Information - -==== Name Strings - -`cl_khr_command_buffer_multi_device` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2023-04-14 | 0.9.0 | First assigned version (provisional). -| 2024-04-30 | 0.9.1 | Added clCommandSVMMemcpyKHR and clCommandSVMMemFillKHR as affected functions (provisional). -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension requires the `cl_khr_command_buffer` extension version 0.9.3. - -==== Contributors - -Ewan Crawford, Codeplay Software Ltd. + -Gordon Brown, Codeplay Software Ltd. + -Kenneth Benzie, Codeplay Software Ltd. + -Alastair Murray, Codeplay Software Ltd. + -Jack Frankland, Codeplay Software Ltd. + -Balaji Calidas, Qualcomm Technologies Inc. + -Joshua Kelly, Qualcomm Technologies, Inc. + -Kevin Petit, Arm Ltd. + -Aharon Abramson, Intel. + -Ben Ashbaugh, Intel. + -Boaz Ouriel, Intel. + -Pekka Jääskeläinen, Tampere University and Intel. + -Jan Solanti, Tampere University + -Nikhil Joshi, NVIDIA + -James Price, Google + - -=== Overview - -The `cl_khr_command_buffer` extension separates command construction from -enqueue by providing a mechanism to record a set of commands which can then be -repeatedly enqueued. However, the commands in a command-buffer can -only be recorded to a single command-queue specified on command-buffer creation. - -`cl_khr_command_buffer_multi_device` extends the scope of a command-buffer to -allow commands to be recorded across multiple queues in the same command-buffer, -providing execution of heterogeneous task graphs from command-queues associated -with different devices. - -The ability for a user to deep copy an existing command-buffer so that the -commands target a different device is also made possible by -`cl_khr_command_buffer_multi_device`. Depending on platform support the mapping -of commands to the new target device can be done either explicitly by the user, -or automatically by the OpenCL runtime. - -=== New Types - -Bitfield for querying command-buffer capabilities of an OpenCL Platform with -{clGetPlatformInfo}, see the -<>: -[source,opencl] ----- -typedef cl_bitfield cl_platform_command_buffer_capabilities_khr ----- - -=== New API Functions - -[source,opencl] ----- -cl_command_buffer_khr clRemapCommandBufferKHR( - cl_command_buffer_khr command_buffer, - cl_bool automatic, - cl_uint num_queues, - const cl_command_queue* queues, - cl_uint num_handles, - const cl_mutable_command_khr* handles, - cl_mutable_command_khr* handles_ret, - cl_int* errcode_ret); ----- - -=== New API Enums - -Enums for querying device command-buffer capabilities with -{clGetDeviceInfo}, see the -<>: - -[source,opencl] ----- -// Accepted values for the param_name parameter to clGetDeviceInfo -CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR 0x12AB -CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR 0x12AC - -// Bits for cl_device_command_buffer_capabilities_khr bitfield -CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR (0x1 << 4) - -// Bits for cl_command_buffer_flags_khr -CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR (0x1 << 2) ----- - -Enums for querying platform command-buffer capabilities with -{clGetPlatformInfo}, see the -<>: - -[source,opencl] ----- -// Accepted values for the param_name parameter to clGetPlatformInfo -CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR 0x0908 - -// Bits for cl_platform_command_buffer_capabilities_khr bitfield -CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR (0x1 << 0) -CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR (0x1 << 1) -CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR (0x1 << 2) ----- - -=== Modifications to section 4.1 of the OpenCL API Specification - -Add to *Table 3*, _Platform Queries_, - -[[cl_khr_command_buffer_multi_device-platform-queries]] -[cols="1,1,4",options="header"] -|==== -| Platform Info -| Return Type -| Description - -| {CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR} -| {cl_platform_command_buffer_capabilities_khr_TYPE} -| Describes platform command-buffer capabilities, encoded as bits in a bitfield. - Supported capabilities are: - - {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR} - Platform supports the ability - to synchronize all commands in a command-buffer using sync-points, irrespective - of the queue the individual commands are recorded to. - - {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} - Platform supports the ability - to create a deep copy of an existing command-buffer with the commands - explicitly remapped to different, potentially <>, - queues. - - {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} - Platform supports the - ability to create a remapped command-buffer where the mapping of commands to - queues is done by the OpenCL runtime in a way it determines as optimal. If - {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} is reported, - {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} must also be reported. - -|==== - -=== Modifications to section 4.2 of the OpenCL API Specification - -Add {CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR} and -{CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR} rows to *Table 5*, _Device Queries_, -of section 4.2. Also, add additional text to the -{CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR} row: - -[[cl_khr_command_buffer_multi_device-device-queries]] -[cols="1,1,4",options="header"] -|==== -| {cl_device_info_TYPE} -| Return Type -| Description - -| {CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR} -| {cl_device_command_buffer_capabilities_khr_TYPE} -| Describes device command-buffer capabilities, encoded as bits in a bitfield. - Supported capabilities are: - - {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR} Device supports the ability - to record commands to more than one command-queue associated with _device_ in - a single command-buffer. - -| {CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR} -| {cl_uint_TYPE} -| Return the number of root devices listed in - {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR} that _device_ can use device-side - synchronization with. - -| {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR} -| {cl_device_id_TYPE}[] -| Return the list of root devices _device_ can use device-side synchronization - with. A device should list itself only if it has native support for - synchronizing commands. Sub-devices are not listed to avoid non-deterministic - results as sub-devices are created, instead if a root device is listed, then - any of its partitioned sub-devices can also be natively synchronized with. - -|==== - -=== Modifications to section 5.11 of the OpenCL API Specification - -Add additional wording to the description column of *Table 36*, _Event Object -Queries_: - -{CL_EVENT_COMMAND_QUEUE} - For events returned by a command-buffer enqueue -operation to multiple command-queues, `NULL` is returned. - -{CL_EVENT_COMMAND_EXECUTION_STATUS} - For events returned by a command-buffer -enqueue operation to multiple command-queues the semantics of execution status -is as follows: - - * {CL_QUEUED} - Command-buffer has been enqueued across the command-queues. - - * {CL_SUBMITTED} - Commands from the command-buffer have been submitted by - the host to any device associated with one of the command-queues. - - * {CL_RUNNING} - Any command from the command-buffer has started execution on - a device. - - * {CL_COMPLETE} - All commands have completed on all devices. - -=== Modifications to section 5.14 of the OpenCL API Specification - -==== Query Updates - -Add additional wording to description column of *Table 38*, _Event Profiling -Queries_: - -* {CL_PROFILING_COMMAND_QUEUED} - For events returned by a command-buffer -enqueue operation to multiple command-queues, the host time when the -command-buffer has been enqueued across the command-queues is used. - -* {CL_PROFILING_COMMAND_SUBMIT} - For events returned by a command-buffer -enqueue operation to multiple command-queues, the host time is used when -command-buffer commands have been submitted to any command-queue. - -* {CL_PROFILING_COMMAND_START} - For events returned by a command-buffer -enqueue operation to multiple command-queues, the host time is used when -any device starts executing a command-buffer command. - -* {CL_PROFILING_COMMAND_END} - For events returned by a command-buffer -enqueue operation to multiple command-queues, the host time is used when -the last command-buffer command finishes execution on any device. - -* {CL_PROFILING_COMMAND_COMPLETE} - For events returned by a command-buffer -enqueue operation to multiple command-queues, the host time is used when the -command-buffer has completed execution across all command-queues. - -[NOTE] -==== -If no reliable device timer sources are available to inform the host side, -or parallel runtime scheduling makes it impossible to identify a first/last -command, then an implementation may fallback to reporting -{CL_PROFILING_COMMAND_SUBMIT} and {CL_PROFILING_COMMAND_COMPLETE} for -{CL_PROFILING_COMMAND_START} and {CL_PROFILING_COMMAND_END} respectively. -==== - -==== Error Updates - -Extend the wording defining the {CL_PROFILING_INFO_NOT_AVAILABLE} error return -code from {clGetEventProfilingInfo} to append the following sentence: - -* If _event_ was created from a call to {clEnqueueCommandBufferKHR}, - {CL_PROFILING_INFO_NOT_AVAILABLE} is returned if all the queues passed - do not have {CL_QUEUE_PROFILING_ENABLE} set. - -=== Modifications to Section 5.X - Command Buffers of the OpenCL API Specification - -==== Additional Section 5.X Introduction Text - -A command-buffer can contain commands recorded to the queues of different -devices if a vendor provides support for inter-device {cl_sync_point_khr_TYPE} -synchronization. This feature is reported either through -{CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}, which informs the user what devices can -synchronize with each other natively on the device-side, or through -{CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR}, which allows synchronization -between all devices in a platform, falling back to host-side synchronization -when device-side synchronization isn't available. These two mechanisms are -referred to as **device-side sync** and **universal sync** respectively. - -If these mechanisms don't report that more than one device can be used in a -command-buffer, it will still be possible to perform multiple queue recording in a -command-buffer if the {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR} -capability is reported for a device. However, with this capability all the -queues commands are recorded to must target the same device. - -Commands recorded to different command-queues in the same command-buffer may be -executed concurrently to each other unless synchronized explicitly with -sync-points. Ordering of other commands submitted to the same command-queues as -used to enqueue a command-buffer is the responsibility of the programmer. A -command-buffer enqueue spanning multiple queues can return an event to use for -synchronization, which will complete once all commands in the command-buffer -have completed. If ordering restrictions are required, this event (or -command-queue barriers) may be used by the user to synchronize the -command-buffer enqueue with regular commands, or another command-buffer enqueue. - -==== Add new section "Section 5.X.Y - Remapping Command Buffers" - -Platforms reporting the {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} capability -support generating a deep copy of a command-buffer with its commands remapped to a -list of command-queues that are potentially <> with the queues -used to create the command-buffer. That is, the remapped command-buffer can -execute on queues that differ in terms of properties and/or associated device -from the original command-buffer queues. - -This functionality is invoked through a new synchronous entry-point -{clRemapCommandBufferKHR} which takes a list of queues to which the commands -should now target. It then returns a command-buffer containing the same -commands as the original, with the same command dependencies, but targeting -different queues. A list of command handles may also be passed to the -entry-point, which allows handles to the equivalent commands in the remapped -command-buffer to be returned by an output parameter. - -Device properties restrict remapping possibilities, as existing commands -can have a configuration which is not supported by another device, and so -remapping may fail with an error relating to this incompatibility. Examples -of command configurations which can introduce incompatibilities when trying to -map to a new device are: - -* Program language features used in a kernel not supported by the new device. -* ND-Range configuration, e.g exceeds new the device max work-group size. -* Misalignment of sub-buffers based on minimum alignment of new device. - -In additional to this functionality, platforms reporting -{CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} allow the user to create a -remapped command-buffer where the mapping of queues to commands is determined by -the OpenCL runtime in a way it determines as optimal. This is particularly -useful in hot plugging environments where devices may appear and disappear -during runtime. - -The function -include::{generated}/api/protos/clRemapCommandBufferKHR.txt[] - -Creates a deep copy of the input command-buffer with the copied commands -remapped to target the passed command-queues. The returned command-buffer -has the same state as the input command-buffer, unless the input -command-buffer is in the <> state, in which case the returned -command-buffer has state <>. - -_command_buffer_ Specifies the command-buffer to create a remapped deep copy of. - -_automatic_ Indicates if the remapping is done explicitly by the user, or -automatically by the OpenCL runtime. If _automatic_ is {CL_FALSE}, then each -element of _queues_ will replace the queue used on _command_buffer_ creation at -the same index. If {CL_TRUE} and {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} -is supported, then the OpenCL runtime will decide in a way it determines optimal -which of the elements in _queues_ each command in the returned command-buffer -will be associated with. - -_num_queues_ The number of command-queues listed in _queues_, must not be 0. - -_queues_ A pointer to an ordered list of command-queues for the returned -command-buffer to target, must be a non-`NULL` value. - -_num_handles_ The number of command handles passed in both _handles_ and -_handles_ret_ lists, may be 0. - -_handles_ An ordered list of handles belonging to _command_buffer_ to create -remapped copies of, may be `NULL`. - -_handles_ret_ Returns an ordered list of handles where each handle is equivalent -to the handle at the same index in _handles_, but belonging to the returned -command-buffer. - -_errcode_ret_ Returns an appropriate error code. If _errcode_ret_ is `NULL`, no -error code is returned. - -{clRemapCommandBufferKHR} returns a valid command-buffer with _errcode_ret_ set -to {CL_SUCCESS} if the command-buffer is created successfully. Otherwise, it -returns a `NULL` value without setting _handles_ret_, and with one of the -following error values returned in _errcode_ret_: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_VALUE} if _num_queues_ is 0, or if _queues_ is `NULL`. - -* {CL_INVALID_VALUE} if _automatic_ is {CL_FALSE} and _num_queues_ is not equal - to the number of queues used on creation of _command_buffer_. - -* {CL_INVALID_VALUE} if _handles_ or _handles_ret_ is `NULL` and - _num_handles_ is > 0, or either _handles_ or _handles_ret_ is not - `NULL` and _num_handles_ is 0. - -* {CL_INVALID_VALUE} if any handle in _handles_ is not a valid command handle - belonging to _command_buffer_. - -* {CL_INVALID_COMMAND_QUEUE} if any command-queue in _queues_ is not a valid - command-queue. - -* {CL_INVALID_CONTEXT} if _command_buffer_ and all the command-queues in - _queues_ do not have the same OpenCL context. - -* {CL_INVALID_OPERATION} if the platform does not support the - {CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR} flag. - -* {CL_INVALID_OPERATION} if the platform does not support the - {CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR} flag and _automatic_ is - {CL_TRUE}. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if such an error would be returned by - passing _queues_ to {clCreateCommandBufferKHR}. - -* Any error relating to device support that can be returned by a command - recording entry-point may also be returned. As a command in _command_buffer_ - can have a configuration that is not supported by a device that is associated - with the queue in _queues_ the command is being remapped to. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources - required by the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -==== Modifications to clCreateCommandBufferKHR - -==== New Property Flag - -Modify the {CL_COMMAND_BUFFER_FLAGS_KHR} property in the -{clCreateCommandBufferKHR} properties table to introduce a new flag to the -bitfield. The following text is now included in the description of property -values. - -[cols=",,",options="header",] -|==== -| *Recording Properties* -| *Property Value* -| *Description* - -| {CL_COMMAND_BUFFER_FLAGS_KHR} -| {cl_command_buffer_flags_khr_TYPE} -| {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} - All commands in the command-buffer - must use native synchronization, as reported by - {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. This can be used as a safeguard - for performant applications that don't want to accidentally fallback to host - synchronization when passing multiple queues. -|==== - -==== Add to clCreateCommandBufferKHR description - -.Summary of command-buffer creation configurations -[width="100%",options="header"] - -|==== -| All devices associated with `queues` can device-side sync | Platform supports universal sync | Condition | Result - -.3+| Yes -.3+| Yes or No -| Any device does not support the multi-queue capability, and has more than one - queue targeting it -| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} -| User sets {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag -| OK -| Otherwise -| OK - -.3+| No -.3+| Yes -| Any device does not support the multi-queue capability, and has more than one - queue targeting it -| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} -| User sets {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag -| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} -| Otherwise -| OK - May be performance implications when synchronizing commands between devices - without device-side sync support. - -| No -| No -| Always -| Error - {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} - -|==== - -===== Parameter Updates - -Parameter descriptions changed to: - -_num_queues_ The number of command-queues listed in _queues_. - -_queues_ Is a pointer to a list of command-queues that the command-buffer may be -executed on. _queues_ must be a non-`NULL` value and length of the list equal to -_num_queues_. - -===== Error Updates - -The returned error: - -* {CL_INVALID_VALUE} if _num_queues_ is not one. - -Is changed to: - -* {CL_INVALID_VALUE} if _num_queues_ is zero. - -Additional errors: - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if _queues_ includes more than one - command-queue associated with a device that does not support capability - {CL_COMMAND_BUFFER_CAPABILITY_MULTIPLE_QUEUE_KHR}. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the - {CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR} flag is set, and any device - associated with a command-queue in _queues_ cannot natively synchronize with - the other devices associated with _queues_ as reported by - {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. - -* {CL_INCOMPATIBLE_COMMAND_QUEUE_KHR} if the platform doesn't support the - {CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR} capability, and any device - associated with a command-queue in _queues_ cannot natively synchronize with - the other devices associated with _queues_ as reported by - {CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR}. - -==== Command recording entry points - -The descriptions of command recording entry-points are modified as described in -this section. These changes apply to all of {clCommandCopyBufferKHR}, -{clCommandCopyBufferRectKHR}, {clCommandCopyBufferToImageKHR}, -{clCommandCopyImageKHR}, {clCommandCopyImageToBufferKHR}, -{clCommandFillBufferKHR}, {clCommandFillImageKHR}, -{clCommandNDRangeKernelKHR}, {clCommandSVMMemcpyKHR} and -{clCommandSVMMemFillKHR}. - -===== Parameter Update - -Parameter description of _command_queue_ is changed to: - -_command_queue_ Specifies the command-queue the command will be recorded to. -If _command_queue_ is `NULL` then only one command-queue must have been set -on _command_buffer_ creation, otherwise _command_queue_ must be a non-`NULL` -value. - -===== Error Update - -The error condition: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. - -Is changed to : - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is `NULL` and _command_buffer_ - was created with more than one queue, or if _command_queue_ is non-`NULL` and - not a command-queue listed on _command_buffer_ creation. - -=== Sample Code - -[source,opencl] ----- -#define CL_CHECK(ERROR) \ - if (ERROR) { \ - std::cerr << "OpenCL error: " << ERROR << "\n"; \ - return ERROR; \ - } - -int main() { - cl_platform_id platform; - CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); - cl_platform_command_buffer_capabilities_khr platform_caps; - CL_CHECK(clGetPlatformInfo(platform, - CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR, - sizeof(platform_caps), &platform_caps, NULL)); - if (!(platform_caps & CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR)) { - std::cerr << "Command-buffer remapping not supported but used in example, " - "skipping\n"; - return 0; - } - - cl_uint num_devices = 0; - CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices)); - std::vector devices(num_devices); - CL_CHECK( - clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, devices.data(), nullptr)); - - // Checks omitted for brevity that either a) the platform supports - // CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR or b) each device is listed - // in the others CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR - - cl_int error; - cl_context context = - clCreateContext(NULL, num_devices, devices.data(), NULL, NULL, &error); - CL_CHECK(error); - - std::vector queues(num_devices); - for (cl_uint i = 0; i < num_devices; i++) { - queues[i] = clCreateCommandQueue(context, devices[i], 0, &error); - CL_CHECK(error); - } - - const char *code = R"OpenCLC( - kernel void vector_addition(global int* tile1, global int* tile2, - global int* res) { - size_t index = get_global_id(0); - res[index] = tile1[index] + tile2[index]; - } - )OpenCLC"; - const size_t length = std::strlen(code); - - cl_program program = - clCreateProgramWithSource(context, 1, &code, &length, &error); - CL_CHECK(error); - - CL_CHECK( - clBuildProgram(program, num_devices, devices.data(), NULL, NULL, NULL)); - - cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); - CL_CHECK(error); - - constexpr size_t frame_count = 60; - constexpr size_t frame_elements = 1024; - constexpr size_t frame_size = frame_elements * sizeof(cl_int); - - constexpr size_t tile_count = 16; - constexpr size_t tile_elements = frame_elements / tile_count; - constexpr size_t tile_size = tile_elements * sizeof(cl_int); - - cl_mem buffer_tile1 = - clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, NULL, &error); - CL_CHECK(error); - - cl_mem buffer_tile2 = - clCreateBuffer(context, CL_MEM_READ_ONLY, tile_size, NULL, &error); - CL_CHECK(error); - - cl_mem buffer_res = - clCreateBuffer(context, CL_MEM_WRITE_ONLY, tile_size, NULL, &error); - CL_CHECK(error); - - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(buffer_tile1), &buffer_tile1)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(buffer_tile2), &buffer_tile2)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(buffer_res), &buffer_res)); - - cl_command_buffer_khr original_cmdbuf = - clCreateCommandBufferKHR(num_devices, queues.data(), nullptr, &error); - CL_CHECK(error); - - cl_mem buffer_src1 = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, NULL, &error); - CL_CHECK(error); - - cl_mem buffer_src2 = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, NULL, &error); - CL_CHECK(error); - - cl_mem buffer_dst = - clCreateBuffer(context, CL_MEM_READ_WRITE, frame_size, NULL, &error); - CL_CHECK(error); - - cl_sync_point_khr tile_sync_point = 0; - for (size_t tile_index = 0; tile_index < tile_count; tile_index++) { - cl_sync_point_khr copy_sync_points[2]; - CL_CHECK(clCommandCopyBufferKHR( - original_cmdbuf, queues[tile_index % num_devices], buffer_src1, - buffer_tile1, tile_index * tile_size, 0, tile_size, - tile_sync_point ? 1 : 0, tile_sync_point ? &tile_sync_point : NULL, - ©_sync_points[0], NULL)); - - CL_CHECK(clCommandCopyBufferKHR( - original_cmdbuf, queues[tile_index % num_devices], buffer_src2, - buffer_tile2, tile_index * tile_size, 0, tile_size, - tile_sync_point ? 1 : 0, - tile_sync_point ? &tile_sync_point : nullptr, - ©_sync_points[1], NULL)); - - cl_sync_point_khr nd_sync_point; - CL_CHECK(clCommandNDRangeKernelKHR( - original_cmdbuf, queues[tile_index % num_devices], NULL, kernel, 1, - NULL, &tile_elements, NULL, 2, copy_sync_points, &nd_sync_point, NULL)); - - CL_CHECK(clCommandCopyBufferKHR( - original_cmdbuf, queues[tile_index % num_devices], buffer_res, - buffer_dst, 0, tile_index * tile_size, tile_size, 1, &nd_sync_point, - &tile_sync_point, NULL)); - } - - CL_CHECK(clFinalizeCommandBufferKHR(original_cmdbuf)); - - std::random_device random_device; - std::mt19937 random_engine{random_device()}; - std::uniform_int_distribution random_distribution{ - 0, std::numeric_limits::max() / 2}; - auto random_generator = [&]() { return random_distribution(random_engine); }; - - auto enqueue_frame = [&](cl_command_buffer_khr command_buffer) { - for (size_t frame_index = 0; frame_index < frame_count; frame_index++) { - std::array enqueue_events; - std::vector src1(frame_elements); - std::generate(src1.begin(), src1.end(), random_generator); - CL_CHECK(clEnqueueWriteBuffer(queues[0], buffer_src1, CL_FALSE, 0, - frame_size, src1.data(), 0, nullptr, - &enqueue_events[0])); - std::vector src2(frame_elements); - std::generate(src2.begin(), src2.end(), random_generator); - CL_CHECK(clEnqueueWriteBuffer(queues[0], buffer_src2, CL_FALSE, 0, - frame_size, src2.data(), 0, nullptr, - &enqueue_events[1])); - - CL_CHECK(clEnqueueCommandBufferKHR(0, NULL, command_buffer, 2, - enqueue_events.data(), - &enqueue_events[2])); - - CL_CHECK(clWaitForEvents(1, enqueue_events[2])); - - for (auto e : enqueue_events) { - CL_CHECK(clReleaseEvent(e)); - } - } - return 0; - }; - - error = enqueue_frame(original_cmdbuf); - CL_CHECK(error); - - // Remap from N queues to 1 queue and run again - cl_command_buffer_khr remapped_cmdbuf = clRemapCommandBufferKHR( - original_cmdbuf, CL_TRUE, 1, queues.data(), 0, NULL, NULL, &error); - CL_CHECK(error); - - error = enqueue_frame(remapped_cmdbuf); - CL_CHECK(error); - - for (unsigned i = 0; i < num_devices; ++i) { - CL_CHECK(clReleaseCommandQueue(queues[i])); - } - CL_CHECK(clReleaseMemObject(buffer_src1)); - CL_CHECK(clReleaseMemObject(buffer_src2)); - CL_CHECK(clReleaseMemObject(buffer_dst)); - - CL_CHECK(clReleaseMemObject(buffer_tile1)); - CL_CHECK(clReleaseMemObject(buffer_tile2)); - CL_CHECK(clReleaseMemObject(buffer_res)); - - CL_CHECK(clReleaseCommandBufferKHR(original_cmdbuf)); - CL_CHECK(clReleaseCommandBufferKHR(remapped_cmdbuf)); - - CL_CHECK(clReleaseKernel(kernel)); - CL_CHECK(clReleaseProgram(program)); - CL_CHECK(clReleaseContext(context)); - - return 0; -} ----- - -=== Issues - -. In cl_event profiling info for a command-buffer running across the queues for - several devices, how do we know what the first & last commands executed are - if there is concurrent execution across devices. -+ --- -*RESOLVED*: Allowed an implementation to fallback to {CL_PROFILING_COMMAND_SUBMIT} -and {CL_PROFILING_COMMAND_COMPLETE} when reporting {CL_PROFILING_COMMAND_START} & -{CL_PROFILING_COMMAND_END}. --- -. Is an atomic constraint required? This would forbid regular clEnqueue* commands, -from interleaving execution on a queue which a command-buffer is being executed -on. -+ --- -*RESOLVED*: This behavior can block parallelism, and constraint is expressible -by the user through existing synchronization mechanisms if they require it. --- -. It is currently an error if a set of command-queues passed to -{clEnqueueCommandBufferKHR} aren't compatible with those set on recording. -Should we relax this as an optional capability that allows an implementation to -do a more expensive command-buffer enqueue for this case? -+ --- -*RESOLVED*: Added as an optional feature. --- diff --git a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc b/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc deleted file mode 100644 index b189c9c8..00000000 --- a/ext/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ /dev/null @@ -1,1010 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_command_buffer_mutable_dispatch]] -== Command Buffers - Mutable Dispatch (Provisional) - -This extension enables users to modify the configuration of kernel execution -commands between command-buffer enqueues. - -=== General Information - -==== Name Strings - -`cl_khr_command_buffer_mutable_dispatch` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2022-08-31 | 0.9.0 | First assigned version (provisional). -| 2023-11-07 | 0.9.1 | Add type {cl_mutable_dispatch_asserts_khr_TYPE} and its possible values (provisional). -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension requires the `cl_khr_command_buffer` extension version 0.9.0. - -==== Contributors - -Ewan Crawford, Codeplay Software Ltd. + -Gordon Brown, Codeplay Software Ltd. + -Kenneth Benzie, Codeplay Software Ltd. + -Alastair Murray, Codeplay Software Ltd. + -Jack Frankland, Codeplay Software Ltd. + -Balaji Calidas, Qualcomm Technologies Inc. + -Joshua Kelly, Qualcomm Technologies, Inc. + -Kevin Petit, Arm Ltd. + -Aharon Abramson, Intel. + -Ben Ashbaugh, Intel. + -Boaz Ouriel, Intel. + -Pekka Jääskeläinen, Tampere University + -Jan Solanti, Tampere University + -Nikhil Joshi, NVIDIA + -James Price, Google + - -=== Overview - -The `cl_khr_command_buffer` extension separates command construction from -enqueue by providing a mechanism to record a set of commands which can then be -repeatedly enqueued. However, the commands recorded to the command-buffer are -immutable between enqueues. - -`cl_khr_command_buffer_mutable_dispatch` removes this restriction, in particular, -this extension allows the configuration of a kernel execution command in a -command-buffer, called a _mutable-dispatch_, to be modified. This allows inputs -and outputs to the kernel, as well as work-item sizes and offsets, to change -without having to re-record the entire command sequence in a new command-buffer. - -=== Interactions with Other Extensions - -The {cl_command_buffer_structure_type_khr_TYPE} type has been added to this -extension for the purpose of allowing expansion of mutable functionality in -future extensions layered on top of `cl_khr_command_buffer_mutable_dispatch`. -Any parameter that is a structure containing a `void* next` member *must* have -a value of `next` that is either `NULL`, or is a pointer to a valid structure -defined by `cl_khr_command_buffer_mutable_dispatch` or an extension layered on -top. To be a valid structure in the pointer chain the first member of the -structure *must* be a {cl_command_buffer_structure_type_khr_TYPE} identifier for the -structure being iterated through, and the second member a `void* next` pointer -to the next structure in the chain. - -[NOTE] -==== -This approach is based on structure pointer chains in Vulkan, for more details -see the "Valid Usage for Structure Pointer Chains" section of the Vulkan -specification. -==== - -This is designed so that another extension layered on -`cl_khr_command_buffer_mutable_dispatch` could allow modification of commands -recorded to a command-buffer other than kernel execution commands. As all -command recording entry-points return a {cl_mutable_command_khr_TYPE} handle, and -aspects like which {cl_mem_TYPE} object a command uses could also be updated between -enqueues of the command-buffer. - -=== New Types - -==== Mutable Command Types - -Types for using mutable-commands objects from -<>: - -[source,opencl] ----- -// Bitfield covering each aspect of a mutable-dispatch which can be updated -typedef cl_bitfield cl_mutable_dispatch_fields_khr; - -// For querying mutable-command objects with clGetMutableCommandInfoKHR -typedef cl_uint cl_mutable_command_info_khr; - -// Identifies the type of a structure to allow structure pointer chains -typedef cl_uint cl_command_buffer_structure_type_khr; - -// Bitfield describing mutable-dispatch assertions, enabling possible optimizations -typedef cl_bitfield cl_mutable_dispatch_asserts_khr; ----- - -Struct type for setting kernel arguments normally passed using {clSetKernelArg} -and {clSetKernelArgSVMPointer}: - -include::{generated}/api/structs/cl_mutable_dispatch_arg_khr.txt[] - -Struct type for setting kernel execution info normally passed using -{clSetKernelExecInfo}: - -include::{generated}/api/structs/cl_mutable_dispatch_exec_info_khr.txt[] - -[NOTE] -==== -_param_name_ is of type {cl_uint_TYPE} rather than {cl_kernel_exec_info_TYPE} so that the -extension can be implemented on OpenCL 1.2 where the {cl_kernel_exec_info_TYPE} -typedef is unavailable. -==== - -Struct type passed to {clUpdateMutableCommandsKHR} for setting the kernel -configuration of a mutable {clCommandNDRangeKernelKHR} command: - -include::{generated}/api/structs/cl_mutable_dispatch_config_khr.txt[] - -_type_ Type of this structure, must be -{CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. - -_next_ Is `NULL` or a pointer to an extending structure. - -_command_ A mutable-command object returned by {clCommandNDRangeKernelKHR} -representing a kernel execution as part of a command-buffer. - -_num_args_ Is the number of kernel arguments being changed. - -_num_svm_args_ Is the number of SVM kernel arguments being changed. - -_num_exec_infos_ Is the number of kernel execution info objects to set for -this dispatch. - -_work_dim_ Is the number of dimensions used to specify the global work-items -and work-items in the work-group. See {clEnqueueNDRangeKernel} for valid usage. - -_arg_list_ Is an array describing the new kernel arguments for this enqueue. It -must contain _num_args_ array elements, each of which encapsulates parameters -passed to {clSetKernelArg}. See {clSetKernelArg} for usage of -{cl_mutable_dispatch_arg_khr_TYPE} members. - -_arg_svm_list_ is an array describing the new SVM kernel arguments for this -enqueue. It must contain _num_svm_args_ array elements, each of which -encapsulates parameters passed to {clSetKernelArgSVMPointer}. See -{clSetKernelArgSVMPointer} for usage of -{cl_mutable_dispatch_arg_khr_TYPE} members, -`arg_size` is ignored. - -_exec_info_list_ Is an array containing _num_exec_infos_ elements -specifying the list of execution info objects use for this command-buffer -enqueue. See {clSetKernelExecInfo} for usage of -{cl_mutable_dispatch_exec_info_khr_TYPE} -members. - -_global_work_offset_ Can be used to specify an array of _work_dim_ unsigned -values that describe the offset used to calculate the global ID of a work-item. -If _global_work_offset_ is `NULL` then the global offset of the dispatch is not -changed. See {clEnqueueNDRangeKernel} for valid usage. - -_global_work_size_ Points to an array of _work_dim_ unsigned values that -describe the number of global work-items in _work_dim_ dimensions that will -execute the kernel function. If _global_work_size_ is `NULL` then the number of -global work-items in the dispatch is not changed. See {clEnqueueNDRangeKernel} -for valid usage. - -_local_work_size_ Points to an array of _work_dim_ unsigned values that -describe the number of work-items that make up a work-group that will execute -the kernel. If _local_work_size_ is `NULL` then the number of local work-items -in the dispatch is not changed. See {clEnqueueNDRangeKernel} for valid usage. - -[[cl_mutable_base_config_khr]] -[source,opencl] ----- -typedef struct _cl_mutable_base_config_khr { - cl_command_buffer_structure_type_khr type, - const void* next, - cl_uint num_mutable_dispatch, - const cl_mutable_dispatch_config_khr* mutable_dispatch_list -} cl_mutable_base_config_khr; ----- - -_type_ Type of this structure, must be -{CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR} - -_next_ Is `NULL` or a pointer to an extending structure. - -_num_mutable_dispatch_ Is the number of mutable-dispatch objects to configure -in this enqueue of the command-buffer. - -_mutable_dispatch_list_ Is an array containing _num_mutable_dispatch_ elements -describing the configurations of mutable kernel execution commands in the -command-buffer. For a description of struct members making up each array -element see {cl_mutable_dispatch_config_khr_TYPE}. - -=== New API Functions - -Mutable-handle entry points from <>: -[source,opencl] ----- -cl_int clUpdateMutableCommandsKHR( - cl_command_buffer_khr command_buffer, - const cl_mutable_base_config_khr* mutable_config); - -cl_int clGetMutableCommandInfoKHR( - cl_mutable_command_khr command, - cl_mutable_command_info_khr param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); ----- - -=== New API Enums - -Enums for working with mutable-command objects from -<>: - -[source,opencl] ----- -// Error code -CL_INVALID_MUTABLE_COMMAND_KHR -1141 - -// Accepted values for the param_name parameter to clGetDeviceInfo -CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 - -// Accepted command buffer property to clCreateCommandBufferKHR -CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B7 - -// Bits for cl_command_buffer_flags_khr -CL_COMMAND_BUFFER_MUTABLE_KHR (0x1 << 1) - -// Accepted ND-range kernel command properties to clCommandNDRangeKernelKHR -CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 -CL_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B8 - -// Bits for cl_mutable_dispatch_fields_khr bitfield -CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR (0x1 << 0) -CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR (0x1 << 1) -CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR (0x1 << 2) -CL_MUTABLE_DISPATCH_ARGUMENTS_KHR (0x1 << 3) -CL_MUTABLE_DISPATCH_EXEC_INFO_KHR (0x1 << 4) - -// Bits for cl_mutable_dispatch_asserts_khr bitfield -CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR (0x1 << 0) - -// cl_mutable_command_info_khr -CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR 0x12A0 -CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR 0x12A1 -CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR 0x12A2 -CL_MUTABLE_DISPATCH_KERNEL_KHR 0x12A3 -CL_MUTABLE_DISPATCH_DIMENSIONS_KHR 0x12A4 -CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR 0x12A5 -CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR 0x12A6 -CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR 0x12A7 -CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR 0x12AD ----- - -Enum values for {cl_command_buffer_structure_type_khr_TYPE} allowing the structure -types used for mutating commands between enqueues to be extended by future -extensions built on top of `cl_khr_command_buffer_mutable_dispatch`. Based on -structure pointer chains in Vulkan. -[source,opencl] ----- -CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR 0 -CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR 1 ----- - -=== Modifications to section 4.2 of the OpenCL API Specification - -Add to *Table 5*, _Device Queries_, of section 4.2: - -[[command-dispatch-queries]] -[caption="Table 5. "] -.List of supported param_names by {clGetDeviceInfo} -[cols="1,1,4",options="header"] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} -| {cl_mutable_dispatch_fields_khr_TYPE} -| Describes device mutable-dispatch capabilities, encoded as bits in a bitfield. - Supported capabilities are: - - {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR} Device supports the ability to modify - the _global_work_offset_ of kernel execution after command recording. - - {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR} Device supports the ability to modify - the _global_work_size_ of kernel execution after command recording. - - {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR} Device supports the ability to modify - the _local_work_size_ of kernel execution after command recording. - - {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR} Device supports the ability to modify - arguments set on a kernel after command recording. - - {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR} Device supports the ability to modify - execution information set on a kernel after command recording. - -|==== - -=== Modifications to Section 5.X - Command Buffers of the OpenCL API Specification - -==== Modifications to clCreateCommandBufferKHR - -Modify the {CL_COMMAND_BUFFER_FLAGS_KHR} property in the -<> table to -introduce a new flag to the bitfield. The following text is now included in the -description of property values. - -[cols=",,",options="header",] -|==== -| *Recording Properties* -| *Property Value* -| *Description* - -| {CL_COMMAND_BUFFER_FLAGS_KHR} -| {cl_command_buffer_flags_khr_TYPE} -| {CL_COMMAND_BUFFER_MUTABLE_KHR} - Enables modification of the - command-buffer, by default command-buffers are immutable. If set, - commands in the command-buffer may be updated via {clUpdateMutableCommandsKHR}. - -|==== - -Add a {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} property to the -<> table. - -[cols=",,",options="header",] -|==== -| *Recording Properties* -| *Property Value* -| *Description* - -| {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} -| {cl_mutable_dispatch_asserts_khr_TYPE} -| This is a bitfield and can be set to a combination of the following values: - - {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} - An assertion by the user that the number of work-groups of any ND-range kernel recorded in this command - buffer will not be updated beyond the number defined when the ND-range kernel was recorded. - If the user's update to the values of _local_work_size_ and/or _global_work_size_ result in an increase - in the number of work-groups in the ND-range over the number specified when the ND-range kernel was - recorded, the behavior is undefined. - -|==== - -==== Modifications to clCommandNDRangeKernelKHR - -===== Properties Parameter - -Description of the _properties_ parameter is changed to: - -_properties_ Specifies a list of properties for the kernel command and their -corresponding values. Each property name is immediately followed by the -corresponding desired value. The list is terminated with 0. If a supported -property and its value is not specified in _properties_, its default value will -be used. _properties_ may be `NULL` in which case the default values for -supported properties will be used. The list of supported properties is described -in the table below. - -.{clCommandNDRangeKernelKHR} properties -[cols=",,",options="header",] -|==== -| *Recording Properties* -| *Property Value* -| *Description* - -| {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} -| {cl_mutable_dispatch_fields_khr_TYPE} -| This is a bitfield and can be set to a combination of the following values: - - {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR} - Determines whether the _global_work_offset_ of kernel execution can be - modified after recording. If set, the _global_work_offset_ of the kernel - execution can be changed with {clUpdateMutableCommandsKHR} using the - {cl_mutable_dispatch_config_khr_TYPE} field - of the _mutable_config_ parameter. Otherwise, the _global_work_offset_ cannot - be modified. - - {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR} - Determines whether the _global_work_size_ of kernel execution can be - modified after recording. If set, the _global_work_size_ of the kernel - execution can be changed with {clUpdateMutableCommandsKHR} using the - {cl_mutable_dispatch_config_khr_TYPE} field - of the _mutable_config_ parameter. Otherwise, the _global_work_size_ cannot be - modified. - - {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR} - Determines whether the _local_work_size_ of kernel execution can be - modified after recording. If set, the _local_work_size_ of the kernel - execution can be changed with {clUpdateMutableCommandsKHR} using the - {cl_mutable_dispatch_config_khr_TYPE} field - of the _mutable_config_ parameter. Otherwise, the _local_work_size_ cannot be - modified. - - {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR} - Determines whether the kernel arguments set on _kernel_ can be updated - between executions. If set, the kernel arguments normally set with - {clSetKernelArg} and {clSetKernelArgSVMPointer} can be changed with - {clUpdateMutableCommandsKHR} using the - {cl_mutable_dispatch_config_khr_TYPE} field - of the _mutable_config_ parameter. Otherwise, the kernel arguments cannot be - modified between executions. - - {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR} - Determines whether the information passed to _kernel_ can be updated between - executions. If set, the execution information of the kernel can be changed - with {clUpdateMutableCommandsKHR} using the - {cl_mutable_dispatch_config_khr_TYPE} field of - the _mutable_config_ parameter. Otherwise, the kernel execution information - cannot be modified. - - If {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} is not specified then it - defaults to the value returned by the - {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} device query. - -| {CL_MUTABLE_DISPATCH_ASSERTS_KHR} -| {cl_mutable_dispatch_asserts_khr_TYPE} -| This is a bitfield and can be set to a combination of the following values: - - {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} - An assertion by the user that the number of work-groups of this ND-range kernel will not be updated beyond - the number defined when the ND-range kernel was recorded. The number of work-groups is - defined as the product for each _i_ from _0_ to _work_dim - 1_ of - _ceil(global_work_size[i]/local_work_size[i])_. -|==== - -===== Mutable Handle Parameter - -Description of the _mutable_handle_ parameter is changed to: - -_mutable_handle_ Returns a handle to the command that can be used in the -{cl_mutable_dispatch_config_khr_TYPE} struct -to update the command configuration between recordings, may be `NULL`. The -lifetime of this handle is tied to the parent command-buffer, such that freeing -the command-buffer will also free this handle. - -===== Additional Errors - -The error condition: - -* {CL_INVALID_OPERATION} if _mutable_handle_ is not `NULL`. - -Is replaced with - -* {CL_INVALID_OPERATION} if the requested - {CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR} properties are not reported by - {CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR} for the device associated with - _command_queue_. If _command_queue_ is `NULL`, the device associated with - _command_buffer_ must report support for these properties. - -The following error condition is added: - -* {CL_INVALID_VALUE} if _command_buffer_ was created with the - {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} property with - {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} and - _local_work_size_ is `NULL`, or if _properties_ includes the - {CL_MUTABLE_DISPATCH_ASSERTS_KHR} property with - {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR} and - _local_work_size_ is `NULL`. - -[[mutable-commands]] -==== New Section in the OpenCL API specification 5.X.5 - Mutable Commands: - -A generic {cl_mutable_command_khr_TYPE} handle is called a _mutable-command_ object -as it can be returned from any command recording entry-point in the -`cl_khr_command_buffer` family of extensions. The mutable-command handles -returned by {clCommandNDRangeKernelKHR} in particular are referred to as -_mutable-dispatch_ objects, and can be modified through the fields of -{cl_mutable_dispatch_config_khr_TYPE}. - -Mutable-command handles are updated between enqueues using entry-point -{clUpdateMutableCommandsKHR}. To enable performant usage, all aspects of -mutation are encapsulated inside a single -{cl_mutable_base_config_khr_TYPE} parameter. This means -that the runtime has access to all the information about how the command-buffer -will change, allowing the command-buffer to be rebuilt as efficiently as -possible. Any modifications to the arguments or execution info of a mutable-dispatch -handle using {cl_mutable_dispatch_arg_khr_TYPE} or -{cl_mutable_dispatch_exec_info_khr_TYPE} have no -affect on the original kernel object used when the command was recorded, and -only influence the {clCommandNDRangeKernelKHR} command associated with the -mutable-dispatch. - -[[mutable-dispatch-kernel-argument-safe-usage]] -[NOTE] -==== -The base `cl_khr_command_buffer` extension -<> that a command-buffer -does not update the reference count of objects set as arguments on kernels -recorded into the command-buffer. - -The implications for applications using {clUpdateMutableCommandsKHR} is -that it is safe to delete objects used as kernel command arguments, if all the -kernel commands using that object as an argument have had their arguments -replaced with a different object. -==== - -To facilitate performant usage for pipelined work flows, where applications -repeatedly call command-buffer update then enqueue, implementations may defer -some of the work to allow {clUpdateMutableCommandsKHR} to return immediately. -Deferring any recompilation until {clEnqueueCommandBufferKHR} avoids blocking -in host code and keeps device occupancy high. This is only possible with a -command-buffer created with the {CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag, -as without this the enqueued command-buffer must complete before any modification -occurs. - -The function - -include::{generated}/api/protos/clUpdateMutableCommandsKHR.txt[] - -Modifies the configuration of mutable-command handles returned during -_command_buffer_ recording, updating the behavior of those commands in future -enqueues of _command_buffer_. Using this function when _command_buffer_ is in -the <> state and not created with the -{CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR} flag causes undefined behavior. - -[NOTE] -==== -Performant usage is to call {clUpdateMutableCommandsKHR} only when the desired -state of all commands is known, rather than iteratively updating each command -individually. -==== - -[NOTE] -==== -If the command buffer has been created with {CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR}, or -the updated ND-range command has been recorded with this flag, and the ND-range parameters are updated so -that the new number of work-groups exceeds the number when the ND-range command was recorded, the behavior -is undefined. -==== - -_command_buffer_ Refers to a valid command-buffer object. - -_mutable_config_ Is a pointer to a -{cl_mutable_base_config_khr_TYPE} structure defining -updates to make to mutable-commands. - -{clUpdateMutableCommandsKHR} returns {CL_SUCCESS} if all the mutable-command -objects were updated successfully. Otherwise, none of the updates to -mutable-command objects are preserved and one of the errors below is returned: - -* {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid - command-buffer. - -* {CL_INVALID_OPERATION} if _command_buffer_ has not been finalized. - -* {CL_INVALID_OPERATION} if _command_buffer_ was not created with the - {CL_COMMAND_BUFFER_MUTABLE_KHR} flag. - -* {CL_INVALID_VALUE} if the _type_ member of _mutable_config_ is not - {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR}. - -* {CL_INVALID_VALUE} if the _mutable_dispatch_list_ member of _mutable_config_ - is `NULL` and _num_mutable_dispatch_ > 0, or _mutable_dispatch_list_ is not - `NULL` and _num_mutable_dispatch_ is 0. - -* {CL_INVALID_VALUE} if the _next_ member of _mutable_config_ is not `NULL` and - any iteration of the structure pointer chain does not contain valid _type_ - and _next_ members. - -* {CL_INVALID_VALUE} if _mutable_config_ is `NULL`, or if both _next_ and - _mutable_dispatch_list_ members of _mutable_config_ are `NULL`. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by - the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by - the OpenCL implementation on the host. - -If the _mutable_dispatch_list_ member of _mutable_config_ is non-`NULL`, then -errors defined by {clEnqueueNDRangeKernel}, {clSetKernelExecInfo}, -{clSetKernelArg}, and {clSetKernelArgSVMPointer} are returned by -{clUpdateMutableCommandsKHR} if any of the array elements are set to an invalid -value. Additionally, the following errors are returned if any -{cl_mutable_dispatch_config_khr_TYPE} element of -the array violates the defined conditions: - -* {CL_INVALID_MUTABLE_COMMAND_KHR} if _command_ is not a valid mutable - command object, or created from _command_buffer_. - -* {CL_INVALID_VALUE} if _type_ is not - {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. - -* {CL_INVALID_OPERATION} if the values of _local_work_size_ and/or - _global_work_size_ result in a change to work-group uniformity. - -* {CL_INVALID_OPERATION} if the _work_dim_ is different from the _work_dim_ set - on _command_ recording. - -* {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR} property - was not set on _command_ recording and _global_work_offset_ is not `NULL`. - -* {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR} property - was not set on _command_ recording and _global_work_size_ is not `NULL`. - -* {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR} property - was not set on _command_ recording and _local_work_size_ is not `NULL`. - -* {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_ARGUMENTS_KHR} property was - not set on _command_ recording and _num_args_ or _num_svm_args_ is non-zero. - -* {CL_INVALID_OPERATION} if the {CL_MUTABLE_DISPATCH_EXEC_INFO_KHR} property was - not set on _command_ recording and _num_exec_infos_ is non-zero. - -* {CL_INVALID_VALUE} if _arg_list_ is `NULL` and _num_args_ > 0, or _arg_list_ - is not `NULL` and _num_args_ is 0. - -* {CL_INVALID_VALUE} if _arg_svm_list_ is `NULL` and _num_svm_args_ > 0, or - _arg_svm_list_ is not `NULL` and _num_svm_args_ is 0. - -* {CL_INVALID_VALUE} if _exec_info_list_ is `NULL` and _num_exec_infos_ > 0, or - _exec_info_list_ is not `NULL` and _num_exec_infos_ is 0. - -The function - -include::{generated}/api/protos/clGetMutableCommandInfoKHR.txt[] - -Queries information about the _command_ object. - -_command_ Specifies the mutable-command object being queried. - -_param_name_ Specifies the information to query. The list of supported -_param_name_ types and the information returned in _param_value_ by -{clGetMutableCommandInfoKHR} is described in the -<> table. - -_param_value_size_ Is used to specify the size in bytes of memory pointed to by -_param_value_. This size must be ≥ size of return type as described in the -<> table. - -_param_value_ Is a pointer to memory where the appropriate result being queried -is returned. If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ret_ Returns the actual size in bytes of data being queried -by _param_name_. If _param_value_size_ret_ is `NULL`, it is ignored. - -[[mutable-command-object-queries]] -._Mutable Command Object Queries_ -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Mutable Command Info -| Return Type -| Description - -| {CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR} -| {cl_command_queue_TYPE} -| Return the command-queue associated with _command_. If `NULL` was passed as - the queue when _command_ was recorded, then the queue associated with the - command-buffer that _command_ belongs to is returned. - -| {CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR} -| {cl_command_buffer_khr_TYPE} -| Return the command-buffer associated with _command_. - -| {CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR} -| {cl_command_type_TYPE} -| Return the command-type associated with _command_. - - The list of supported event command types defined by {clGetEventInfo} is used - with the matching command. - -| {CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR} -| {cl_ndrange_kernel_command_properties_khr_TYPE}[] -| Return the properties argument specified on _command_ recording with - {clCommandNDRangeKernelKHR}. - - If the properties argument specified on creation of _command_ was not - `NULL`, the implementation must return the values specified in the - properties argument in the same order and without including additional - properties. - - If the properties argument specified on creation of _command_ was `NULL`, - or _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - there are no properties to be returned. - -| {CL_MUTABLE_DISPATCH_KERNEL_KHR} -| {cl_kernel_TYPE} -| Return the kernel associated with _command_ when recorded with - {clCommandNDRangeKernelKHR}. - - If _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - the value returned in _param_value_ is not valid. - -| {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR} -| {cl_uint_TYPE} -| Return the number of work-item dimensions specified when _command_ was - created. - - If _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - the value returned in _param_value_ is not valid. - -| {CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR} -| {size_t_TYPE}[] -| Return the global work-item offset set on _command_ creation, or from - the most recent update via {clUpdateMutableCommandsKHR} where this value - was modified. The output array contains _work_dim_ values, where _work_dim_ is - returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. If a global work-item - offset was not set, zero is returned for each element in the array. - - If _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - the value returned in _param_value_ is not valid. - -| {CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR} -| {size_t_TYPE}[] -| Return the global work-item size set on _command_ creation, or from - the most recent update via {clUpdateMutableCommandsKHR} where this value - was modified. The output array contains _work_dim_ values, where _work_dim_ is - returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. - If a global work-item size was not set, zero is returned for each element in - the array. - - If _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - the value returned in _param_value_ is not valid. - -| {CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR} -| {size_t_TYPE}[] -| Return the local work-item size set on _command_ creation, or from - the most recent update via {clUpdateMutableCommandsKHR} where this value - was modified. The output array contains _work_dim_ values, where _work_dim_ is - returned by the query {CL_MUTABLE_DISPATCH_DIMENSIONS_KHR}. If a local work-item - size was not set, zero is returned for each element in the array. - - If _command_ was not recorded from a {clCommandNDRangeKernelKHR} command, the - implementation must return _param_value_size_ret_ equal to 0, indicating that - the value returned in _param_value_ is not valid. -|==== - -{clGetMutableCommandInfoKHR} returns {CL_SUCCESS} if the function is executed -successfully. Otherwise, it returns one of the following errors: - -* {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in the - <> table - and _param_value_ is not `NULL`. - -* {CL_INVALID_MUTABLE_COMMAND_KHR} if _command_ is not a valid mutable - command object. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources - required by the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -=== Sample Code - -Sample application updating the arguments to a mutable-dispatch between -command-buffer submissions. - -[source,opencl] ----- - #define CL_CHECK(ERROR) \ - if (ERROR) { \ - std::cerr << "OpenCL error: " << ERROR << "\n"; \ - return ERROR; \ - } - - int main() { - cl_platform_id platform; - CL_CHECK(clGetPlatformIDs(1, &platform, nullptr)); - cl_device_id device; - CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr)); - - cl_mutable_dispatch_fields_khr mutable_capabilities; - CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, - sizeof(mutable_capabilities), &mutable_capabilities, - nullptr)); - if (!(mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR)) { - std::cerr - << "Device does not support update arguments to a mutable-dispatch, " - "skipping example.\n"; - return 0; - } - - cl_int error; - cl_context context = - clCreateContext(nullptr, 1, &device, nullptr, nullptr, &error); - CL_CHECK(error); - - const char* code = R"OpenCLC( - kernel void vector_addition(global int* tile1, global int* tile2, - global int* res) { - size_t index = get_global_id(0); - res[index] = tile1[index] + tile2[index]; - } - )OpenCLC"; - const size_t length = std::strlen(code); - - cl_program program = - clCreateProgramWithSource(context, 1, &code, &length, &error); - CL_CHECK(error); - - CL_CHECK(clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr)); - - cl_kernel kernel = clCreateKernel(program, "vector_addition", &error); - CL_CHECK(error); - - // Set the parameters of the frames - constexpr size_t iterations = 60; - constexpr size_t elem_size = sizeof(cl_int); - constexpr size_t frame_width = 32; - constexpr size_t frame_count = frame_width * frame_width; - constexpr size_t frame_size = frame_count * elem_size; - - cl_mem input_A_buffers[2] = {nullptr, nullptr}; - cl_mem input_B_buffers[2] = {nullptr, nullptr}; - cl_mem output_buffers[2] = {nullptr, nullptr}; - - // Create the buffer to swap between even and odd kernel iterations - for (size_t i = 0; i < 2; i++) { - input_A_buffers[i] = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - - input_B_buffers[i] = - clCreateBuffer(context, CL_MEM_READ_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - - output_buffers[i] = - clCreateBuffer(context, CL_MEM_WRITE_ONLY, frame_size, nullptr, &error); - CL_CHECK(error); - } - - cl_command_queue command_queue = - clCreateCommandQueue(context, device, 0, &error); - CL_CHECK(error); - - // Create command-buffer with mutable flag so we can update it - cl_command_buffer_properties_khr properties[3] = { - CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_MUTABLE_KHR, 0}; - cl_command_buffer_khr command_buffer = - clCreateCommandBufferKHR(1, &command_queue, properties, &error); - CL_CHECK(error); - - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_A_buffers[0])); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &input_B_buffers[0])); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &output_buffers[0])); - - // Instruct the nd-range command to allow for mutable kernel arguments - cl_ndrange_kernel_command_properties_khr mutable_properties[] = { - CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, - CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0}; - - // Create command handle for mutating nd-range command - cl_mutable_command_khr command_handle = nullptr; - - // Add the nd-range kernel command - error = clCommandNDRangeKernelKHR( - command_buffer, command_queue, mutable_properties, kernel, 1, nullptr, - &frame_count, nullptr, 0, nullptr, nullptr, &command_handle); - CL_CHECK(error); - - CL_CHECK(clFinalizeCommandBufferKHR(command_buffer)); - - // Prepare for random input generation - std::random_device random_device; - std::mt19937 random_engine{random_device()}; - std::uniform_int_distribution random_distribution{ - std::numeric_limits::min() / 2, - std::numeric_limits::max() / 2}; - - // Iterate over each frame - for (size_t i = 0; i < iterations; i++) { - // Set the buffers for the current frame - cl_mem input_A_buffer = input_A_buffers[i % 2]; - cl_mem input_B_buffer = input_B_buffers[i % 2]; - cl_mem output_buffer = output_buffers[i % 2]; - - // Generate input A data - std::vector input_a(frame_count); - std::generate(std::begin(input_a), std::end(input_a), - [&]() { return random_distribution(random_engine); }); - - // Write the generated data to the input A buffer - error = - clEnqueueWriteBuffer(command_queue, input_A_buffer, CL_FALSE, 0, - frame_size, input_a.data(), 0, nullptr, nullptr); - CL_CHECK(error); - - // Generate input B data - std::vector input_b(frame_count); - std::generate(std::begin(input_b), std::end(input_b), - [&]() { return random_distribution(random_engine); }); - - // Write the generated data to the input B buffer - error = - clEnqueueWriteBuffer(command_queue, input_B_buffer, CL_FALSE, 0, - frame_size, input_b.data(), 0, nullptr, nullptr); - CL_CHECK(error); - - // If not executing the first frame - if (i != 0) { - // Configure the mutable configuration to update the kernel arguments - cl_mutable_dispatch_arg_khr arg_0{0, sizeof(cl_mem), &input_A_buffer}; - cl_mutable_dispatch_arg_khr arg_1{1, sizeof(cl_mem), &input_B_buffer}; - cl_mutable_dispatch_arg_khr arg_2{2, sizeof(cl_mem), &output_buffer}; - cl_mutable_dispatch_arg_khr args[] = {arg_0, arg_1, arg_2}; - cl_mutable_dispatch_config_khr dispatch_config{ - CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, - nullptr, - command_handle, - 3 /* num_args */, - 0 /* num_svm_arg */, - 0 /* num_exec_infos */, - 0 /* work_dim - 0 means no change to dimensions */, - args /* arg_list */, - nullptr /* arg_svm_list - nullptr means no change*/, - nullptr /* exec_info_list */, - nullptr /* global_work_offset */, - nullptr /* global_work_size */, - nullptr /* local_work_size */}; - cl_mutable_base_config_khr mutable_config{ - CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, - &dispatch_config}; - - // Update the command buffer with the mutable configuration - error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); - CL_CHECK(error); - } - - // Enqueue the command buffer - error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, nullptr, - nullptr); - CL_CHECK(error); - - // Allocate memory for the output data - std::vector output(frame_count); - - // Read the output data from the output buffer - error = clEnqueueReadBuffer(command_queue, output_buffer, CL_TRUE, 0, - frame_size, output.data(), 0, nullptr, nullptr); - CL_CHECK(error); - - // Flush and execute the read buffer - error = clFinish(command_queue); - CL_CHECK(error); - - // Verify the results of the frame - for (size_t i = 0; i < frame_count; ++i) { - const cl_int result = input_a[i] + input_b[i]; - if (output[i] != result) { - std::cerr << "Error: Incorrect result at index " << i << " - Expected " - << output[i] << " was " << result << std::endl; - std::exit(1); - } - } - } - - std::cout << "Result verified\n"; - - CL_CHECK(clReleaseCommandBufferKHR(command_buffer)); - for (size_t i = 0; i < 2; i++) { - CL_CHECK(clReleaseMemObject(input_A_buffers[i])); - CL_CHECK(clReleaseMemObject(input_B_buffers[i])); - CL_CHECK(clReleaseMemObject(output_buffers[i])); - } - CL_CHECK(clReleaseCommandQueue(command_queue)); - CL_CHECK(clReleaseKernel(kernel)); - CL_CHECK(clReleaseProgram(program)); - CL_CHECK(clReleaseContext(context)); - CL_CHECK(clReleaseDevice(device)); - return 0; - } ----- - -=== Issues - -. Include simpler, more user friendly, entry-points for updating kernel - arguments? -+ --- -*RESOLVED*: Can be implemented in the ecosystem as a layer on top, if -that layer proves popular then can be introduced, possibly as another -extension on top. --- - -. Add a command-buffer clone entry-point for deep copying a command-buffer? - Arguments could then be updated and both command-buffers used. Useful for - techniques like double buffering. -+ --- -*Resolved*: In the use-case we're targeting a user would only have a handle to -the original command-buffer, but not the clone, which may limit the usefulness -of this capability. Additionally, an implementation could be complicated by -non-trivial deep copying of the underlying objects contained in the -command-buffer. As a result of this new entry-point being an additive change to -the specification it is omitted, and if its functionality has demand later, it -may be a introduced as a stand alone extension. --- diff --git a/ext/cl_khr_create_command_queue.asciidoc b/ext/cl_khr_create_command_queue.asciidoc deleted file mode 100644 index de1cf6e4..00000000 --- a/ext/cl_khr_create_command_queue.asciidoc +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_create_command_queue]] -== Creating Command-Queues with Properties - -=== Overview - -The section describes the *cl_khr_create_command_queue* extension. - -This extension allows OpenCL 1.x devices to support an equivalent of the -{clCreateCommandQueueWithProperties} API that was added in OpenCL 2.0. -This allows OpenCL 1.x devices to support other optional extensions or -features that use the {clCreateCommandQueueWithProperties} API to specify -additional command-queue properties that cannot be specified using the -OpenCL 1.x {clCreateCommandQueue} API. - -No new command-queue properties are required by this extension. -Applications may use the existing {CL_DEVICE_QUEUE_PROPERTIES} query to -determine command-queue properties that are supported by the device. - -OpenCL 2.x devices may support this extension for compatibility. In -this scenario, the function added by this extension will have the same -capabilities as the core {clCreateCommandQueueWithProperties} API. -Applications that only target OpenCL 2.x devices should use the core -OpenCL 2.x {clCreateCommandQueueWithProperties} API instead of this -extension API. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -=== New API Functions - -[source,opencl] ----- -cl_command_queue clCreateCommandQueueWithPropertiesKHR( - cl_context context, - cl_device_id device, - const cl_queue_properties_khr *properties, - cl_int *errcode_ret); ----- - -=== New API Types - -[source,opencl] ----- -typedef cl_properties cl_queue_properties_khr; ----- - -=== Modifications to the OpenCL 1.2 Specification - -(Add to Table 5.2 for {CL_QUEUE_PROPERTIES} in Section 5.1) :: -+ --- - -[caption="Table 5.2 "] -.List of supported param_names by {clGetCommandQueueInfo} -[cols="2,1,3",options="header",] -|======================================================================= -| Queue Info | Return Type | Description - -| {CL_QUEUE_PROPERTIES} -| {cl_command_queue_properties_TYPE} -| Returns the currently specified properties for the command-queue. -These properties are specified by the _properties_ argument in -{clCreateCommandQueue}, or by the {CL_QUEUE_PROPERTIES} property value in -{clCreateCommandQueueWithPropertiesKHR}. - -|======================================================================= --- - -(Add a new Section 5.1.1, *Creating Command-Queues With Properties*) :: -+ --- - -The function - -include::{generated}/api/protos/clCreateCommandQueueWithPropertiesKHR.txt[] - -allows creation of a command-queue from an array of properties -for the specified device. - -_context_ must be a valid OpenCL context. - -_device_ must be a device or sub-device associated with _context_. It -can either be in the list of devices and sub-devices specified when -_context_ is created using {clCreateContext} or -be a root device with the same device type as specified when _context_ -is created using {clCreateContextFromType}. - -_properties_ specifies a list of properties for the command-queue and -their corresponding values. Each property name is immediately followed -by the corresponding desired value. The list is terminated with 0. The -list of supported properties is described in the table below. If a -supported property and its value is not specified in _properties_, its -default value will be used. _properties_ can be NULL in which case the -default values for supported command-queue properties will be used. - -[caption="Table X.Y "] -.List of supported param_names by {clCreateCommandQueueWithPropertiesKHR} -|======================================================================= -|*Queue Properties* |*Property Value* |*Description* - -|{CL_QUEUE_PROPERTIES} -|{cl_bitfield_TYPE} -| This is a bitfield and can be set to a combination of the following -values: + -{blank} -{CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE} - Determines whether the -commands queued in the command-queue are executed in-order or out-of-order. If -set, the commands in the command-queue are executed out-of-order. Otherwise, -commands are executed in-order. + -{blank} -{CL_QUEUE_PROFILING_ENABLE} - Enable or disable profiling of commands in -the command-queue. If set, the profiling of commands is enabled. Otherwise, -profiling of commands is disabled. + -{blank} -If {CL_QUEUE_PROPERTIES} is not specified an in-order command-queue that -does not support profiling of commands is created for the specified device. - -|======================================================================= - -_errcode_ret_ will return an appropriate error code. If _errcode_ret_ -is NULL, no error code is returned. - -{clCreateCommandQueueWithPropertiesKHR} returns a valid non-zero -command-queue and _errcode_ret_ is set to {CL_SUCCESS} if the -command-queue is created successfully. Otherwise, it returns a NULL -value with one of the following error values returned in _errcode_ret_: - -* {CL_INVALID_CONTEXT} if _context_ is not a valid context. - -* {CL_INVALID_DEVICE} if _device_ is not a valid device or is not associated -with _context_. - -* {CL_INVALID_VALUE} if values specified in _properties_ are not valid. - -* {CL_INVALID_QUEUE_PROPERTIES} if values specified in _properties_ are -valid but are not supported by the device. - -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required -by the OpenCL implementation on the device. - -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required -by the OpenCL implementation on the host. --- diff --git a/ext/cl_khr_d3d10_sharing.asciidoc b/ext/cl_khr_d3d10_sharing.asciidoc deleted file mode 100644 index 25e89a89..00000000 --- a/ext/cl_khr_d3d10_sharing.asciidoc +++ /dev/null @@ -1,889 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_d3d10_sharing]] -== Creating OpenCL Memory Objects from Direct3D 10 Buffers and Textures - -[[cl_khr_d3d10_sharing-overview]] -=== Overview - -This section describes the *cl_khr_d3d10_sharing* extension. -The goal of this extension is to provide interoperability between OpenCL and -Direct3D 10. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_d3d10_sharing-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_int clGetDeviceIDsFromD3D10KHR(cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void *d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id *devices, - cl_uint *num_devices); - -cl_mem clCreateFromD3D10BufferKHR(cl_context context, - cl_mem_flags flags, - ID3D10Buffer *resource, - cl_int *errcode_ret); - -cl_mem clCreateFromD3D10Texture2DKHR(cl_context context, - cl_mem_flags flags, - ID3D10Texture2D *resource, - UINT subresource, - cl_int *errcode_ret); - -cl_mem clCreateFromD3D10Texture3DKHR(cl_context context, - cl_mem_flags flags, - ID3D10Texture3D *resource, - UINT subresource, - cl_int *errcode_ret); - -cl_int clEnqueueAcquireD3D10ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); ----- - -[[cl_khr_d3d10_sharing-new-tokens]] -=== New Tokens - -Accepted as a Direct3D 10 device source in the _d3d_device_source_ parameter -of {clGetDeviceIDsFromD3D10KHR}: - ----- -CL_D3D10_DEVICE_KHR -CL_D3D10_DXGI_ADAPTER_KHR ----- - -Accepted as a set of Direct3D 10 devices in the _d3d_device_set_ parameter -of {clGetDeviceIDsFromD3D10KHR}: - ----- -CL_PREFERRED_DEVICES_FOR_D3D10_KHR -CL_ALL_DEVICES_FOR_D3D10_KHR ----- - -Accepted as a property name in the _properties_ parameter of -{clCreateContext} and {clCreateContextFromType}: - ----- -CL_CONTEXT_D3D10_DEVICE_KHR ----- - -Accepted as a property name in the _param_name_ parameter of -{clGetContextInfo}: - ----- -CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetMemObjectInfo}: - ----- -CL_MEM_D3D10_RESOURCE_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetImageInfo}: - ----- -CL_IMAGE_D3D10_SUBRESOURCE_KHR ----- - -Returned in the _param_value_ parameter of {clGetEventInfo} when -_param_name_ is {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR -CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR ----- - -Returned by {clCreateContext} and {clCreateContextFromType} if the Direct3D -10 device specified for interoperability is not compatible with the devices -against which the context is to be created: - ----- -CL_INVALID_D3D10_DEVICE_KHR ----- - -Returned by {clCreateFromD3D10BufferKHR} when _resource_ is not a Direct3D -10 buffer object, and by {clCreateFromD3D10Texture2DKHR} and -{clCreateFromD3D10Texture3DKHR} when _resource_ is not a Direct3D 10 texture -object: - ----- -CL_INVALID_D3D10_RESOURCE_KHR ----- - -Returned by {clEnqueueAcquireD3D10ObjectsKHR} when any of _mem_objects_ are -currently acquired by OpenCL: - ----- -CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR ----- - -Returned by {clEnqueueReleaseD3D10ObjectsKHR} when any of _mem_objects_ are -not currently acquired by OpenCL: - ----- -CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR ----- - -[[cl_khr_d3d10_sharing-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -In _section 4.4_, replace the description of _properties_ under -{clCreateContext} with: - -"_properties_ specifies a list of context property names and their -corresponding values. -Each property is followed immediately by the corresponding desired value. -The list is terminated with zero. -If a property is not specified in _properties_, then its default value -(listed in _table 4.5_) is used (it is said to be specified implicitly). -If _properties_ is `NULL` or empty (points to a list whose first value is -zero), all attributes take on their default values." - -Add the following to _table 4.5_: - -[cols="2,1,2",options="header",] -|==== -| *cl_context_properties enum* -| *Property value* -| *Description* - -| {CL_CONTEXT_D3D10_DEVICE_KHR} -| ID3D10Device * -| Specifies the ID3D10Device * to use for Direct3D 10 interoperability. - - The default value is `NULL`. - -|==== - -Add to the list of errors for {clCreateContext}: - - * {CL_INVALID_D3D10_DEVICE_KHR} if the value of the property - {CL_CONTEXT_D3D10_DEVICE_KHR} is non-`NULL` and does not specify a valid - Direct3D 10 device with which the _cl_device_ids_ against which this - context is to be created may interoperate. - * {CL_INVALID_OPERATION} if Direct3D 10 interoperability is specified by - setting {CL_INVALID_D3D10_DEVICE_KHR} to a non-`NULL` value, and - interoperability with another graphics API is also specified. - -Add to the list of errors for {clCreateContextFromType} the same new errors -described above for {clCreateContext}. - -Add the following row to _table 4.6_: - -[cols="2,1,2",options="header",] -|==== -| *cl_context_info* -| *Return Type* -| *Information returned in param_value* - -| {CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR} -| {cl_bool_TYPE} -| Returns {CL_TRUE} if Direct3D 10 resources created as shared by setting - _MiscFlags_ to include D3D10_RESOURCE_MISC_SHARED will perform faster when - shared with OpenCL, compared with resources which have not set this flag. - Otherwise returns {CL_FALSE}. -|==== - -[[cl_khr_d3d10_sharing-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add to the list of errors for {clGetMemObjectInfo}: - - * {CL_INVALID_D3D10_RESOURCE_KHR} if _param_name_ is - {CL_MEM_D3D10_RESOURCE_KHR} and _memobj_ was not created by the function - {clCreateFromD3D10BufferKHR}, {clCreateFromD3D10Texture2DKHR}, or - {clCreateFromD3D10Texture3DKHR}. - -Extend _table 5.12_ to include the following entry. - -[cols="2,1,2",options="header",] -|==== -| *cl_mem_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_MEM_D3D10_RESOURCE_KHR} -| ID3D10Resource * -| If _memobj_ was created using {clCreateFromD3D10BufferKHR}, - {clCreateFromD3D10Texture2DKHR}, or {clCreateFromD3D10Texture3DKHR}, - returns the _resource_ argument specified when _memobj_ was created. - -|==== - -Add to the list of errors for {clGetImageInfo}: - - * {CL_INVALID_D3D10_RESOURCE_KHR} if _param_name_ is - {CL_IMAGE_D3D10_SUBRESOURCE_KHR} and _image_ was not created by the function - {clCreateFromD3D10Texture2DKHR}, or {clCreateFromD3D10Texture3DKHR}. - -Extend _table 5.9_ to include the following entry. - -[cols="2,1,2",options="header",] -|==== -| *cl_image_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_IMAGE_D3D10_SUBRESOURCE_KHR} -| UINT -| If _image_ was created using {clCreateFromD3D10Texture2DKHR}, or - {clCreateFromD3D10Texture3DKHR}, returns the _subresource_ argument - specified when _image_ was created. -|==== - -Add to _table 5.22_ in the *Info returned in * column for -_cl_event_info_ = {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR -CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR ----- - -[[cl_khr_d3d10_sharing-sharing-memory-objects-with-direct3d-10-resources]] -=== Sharing Memory Objects with Direct3D 10 Resources - -This section discusses OpenCL functions that allow applications to use -Direct3D 10 resources as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and Direct3D 10. -The OpenCL API may be used to execute kernels that read and/or write memory -objects that are also Direct3D 10 resources. -An OpenCL image object may be created from a Direct3D 10 texture resource. -An OpenCL buffer object may be created from a Direct3D 10 buffer resource. -OpenCL memory objects may be created from Direct3D 10 objects if and only if -the OpenCL context has been created from a Direct3D 10 device. - -[[cl_khr_d3d10_sharing-querying-opencl-devices-corresponding-to-direct3d-10-devices]] -==== Querying OpenCL Devices Corresponding to Direct3D 10 Devices - -The OpenCL devices corresponding to a Direct3D 10 device may be queried. -The OpenCL devices corresponding to a DXGI adapter may also be queried. -The OpenCL devices corresponding to a Direct3D 10 device will be a subset of -the OpenCL devices corresponding to the DXGI adapter against which the -Direct3D 10 device was created. - -The OpenCL devices corresponding to a Direct3D 10 device or a DXGI device -may be queried using the function - -include::{generated}/api/protos/clGetDeviceIDsFromD3D10KHR.txt[] - -_platform_ refers to the platform ID returned by {clGetPlatformIDs}. - -_d3d_device_source_ specifies the type of _d3d_object_, and must be one of -the values shown in the table below. - -_d3d_object_ specifies the object whose corresponding OpenCL devices are -being queried. -The type of _d3d_object_ must be as specified in the table below. - -_d3d_device_set_ specifies the set of devices to return, and must be one of -the values shown in the table below. - -_num_entries_ is the number of {cl_device_id_TYPE} entries that can be added to -_devices_. -If _devices_ is not `NULL` then _num_entries_ must be greater than zero. - -_devices_ returns a list of OpenCL devices found. -The {cl_device_id_TYPE} values returned in _devices_ can be used to identify a -specific OpenCL device. -If _devices_ is `NULL`, this argument is ignored. -The number of OpenCL devices returned is the minimum of the value specified -by _num_entries_ and the number of OpenCL devices corresponding to -_d3d_object_. - -_num_devices_ returns the number of OpenCL devices available that correspond -to _d3d_object_. -If _num_devices_ is `NULL`, this argument is ignored. - -{clGetDeviceIDsFromD3D10KHR} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise it may return - - * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. - * {CL_INVALID_VALUE} if _d3d_device_source_ is not a valid value, - _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero - and _devices_ is not `NULL`, or if both _num_devices_ and _devices_ are - `NULL`. - * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to _d3d_object_ - were found. - -[[cl_khr_d3d10_sharing-clGetDeviceIDsFromD3D10KHR-object-type]] -.Direct3D 10 object types that may be used by {clGetDeviceIDsFromD3D10KHR} -[cols=",",options="header",] -|==== -| {cl_d3d10_device_source_khr_TYPE} -| Type of _d3d_object_ - -| {CL_D3D10_DEVICE_KHR} -| ID3D10Device * - -| {CL_D3D10_DXGI_ADAPTER_KHR} -| IDXGIAdapter * - -|==== - -[[cl_khr_d3d10_sharing-clGetDeviceIDsFromD3D10KHR-devices]] -.Sets of devices queriable using {clGetDeviceIDsFromD3D10KHR} -[cols=",",options="header",] -|==== -| {cl_d3d10_device_set_khr_TYPE} -| Devices returned in _devices_ - -| {CL_PREFERRED_DEVICES_FOR_D3D10_KHR} -| The preferred OpenCL devices associated with the specified Direct3D - object. - -| {CL_ALL_DEVICES_FOR_D3D10_KHR} -| All OpenCL devices which may interoperate with the specified Direct3D - object. - Performance of sharing data on these devices may be considerably less than - on the preferred devices. - -|==== - -[[cl_khr_d3d10_sharing-lifetime-of-shared-objects]] -==== Lifetime of Shared Objects - -An OpenCL memory object created from a Direct3D 10 resource remains valid as -long as the corresponding Direct3D 10 resource has not been deleted. -If the Direct3D 10 resource is deleted through the Direct3D 10 API, -subsequent use of the OpenCL memory object will result in undefined -behavior, including but not limited to possible OpenCL errors, data -corruption, and program termination. - -The successful creation of a {cl_context_TYPE} against a Direct3D 10 device -specified via the context create parameter {CL_CONTEXT_D3D10_DEVICE_KHR} will -increment the internal Direct3D reference count on the specified Direct3D 10 -device. -The internal Direct3D reference count on that Direct3D 10 device will be -decremented when the OpenCL reference count on the returned OpenCL context -drops to zero. - -The OpenCL context and corresponding command-queues are dependent on the -existence of the Direct3D 10 device from which the OpenCL context was -created. -If the Direct3D 10 device is deleted through the Direct3D 10 API, subsequent -use of the OpenCL context will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -[[cl_khr_d3d10_sharing-sharing-direct3d-10-buffer-resources-as-opencl-buffer-objects]] -==== Sharing Direct3D 10 Buffer Resources as OpenCL Buffer Objects - -The function - -include::{generated}/api/protos/clCreateFromD3D10BufferKHR.txt[] - -creates an OpenCL buffer object from a Direct3D 10 buffer. - -_context_ is a valid OpenCL context created from a Direct3D 10 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 10 buffer to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D10BufferKHR} returns a valid non-zero OpenCL buffer object -and _errcode_ret_ is set to {CL_SUCCESS} if the buffer object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. - * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 buffer - resource, if _resource_ was created with the D3D10_USAGE flag - D3D10_USAGE_IMMUTABLE, if a {cl_mem_TYPE} from _resource_ has already been - created using {clCreateFromD3D10BufferKHR}, or if _context_ was not - created against the same Direct3D 10 device from which _resource_ was - created. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The size of the returned OpenCL buffer object is the same as the size of -_resource_. -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -[[cl_khr_d3d10_sharing-sharing-direct3d-10-texture-and-resources-as-opencl-image-objects]] -==== Sharing Direct3D 10 Texture and Resources as OpenCL Image Objects - -The function - -include::{generated}/api/protos/clCreateFromD3D10Texture2DKHR.txt[] - -creates an OpenCL 2D image object from a subresource of a Direct3D 10 2D -texture. - -_context_ is a valid OpenCL context created from a Direct3D 10 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 10 2D texture to share. - -_subresource_ is the subresource of _resource_ to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D10Texture2DKHR} returns a valid non-zero OpenCL image object -and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if - _subresource_ is not a valid subresource index for _resource_. - * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 texture - resource, if _resource_ was created with the D3D10_USAGE flag - D3D10_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a - {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already been - created using {clCreateFromD3D10Texture2DKHR}, or if _context_ was not - created against the same Direct3D 10 device from which _resource_ was - created. - * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 10 texture format of - _resource_ is not listed in the table - <> or if the Direct3D 10 texture - format of _resource_ does not map to a supported OpenCL image format. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The width and height of the returned OpenCL 2D image object are determined -by the width and height of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 2D image object is -determined by the format of _resource_ by the table -<>. - -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -The function - -include::{generated}/api/protos/clCreateFromD3D10Texture3DKHR.txt[] - -creates an OpenCL 3D image object from a subresource of a Direct3D 10 3D -texture. - -_context_ is a valid OpenCL context created from a Direct3D 10 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to table 5.3 for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 10 3D texture to share. - -_subresource_ is the subresource of _resource_ to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D10Texture3DKHR} returns a valid non-zero OpenCL image object -and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if - _subresource_ is not a valid subresource index for _resource_. - * {CL_INVALID_D3D10_RESOURCE_KHR} if _resource_ is not a Direct3D 10 texture - resource, if _resource_ was created with the D3D10_USAGE flag - D3D10_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a - {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already been - created using {clCreateFromD3D10Texture3DKHR}, or if _context_ was not - created against the same Direct3D 10 device from which _resource_ was - created. - * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 10 texture format of - _resource_ is not listed in the table - <> or if the Direct3D 10 texture - format of _resource_ does not map to a supported OpenCL image format. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The width, height and depth of the returned OpenCL 3D image object are -determined by the width, height and depth of subresource _subresource_ of -_resource_. -The channel type and order of the returned OpenCL 3D image object is -determined by the format of _resource_ by the table -<>. - -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -[[cl_khr_d3d10_sharing-mapping-of-image-formats]] -._Direct3D 10 formats and corresponding OpenCL image formats_ -[cols=",",options="header",] -|==== -| *DXGI format* -| *CL image format* - -*(channel order, channel data type)* - -| DXGI_FORMAT_R32G32B32A32_FLOAT | `CL_RGBA`, `CL_FLOAT` -| DXGI_FORMAT_R32G32B32A32_UINT | `CL_RGBA`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32G32B32A32_SINT | `CL_RGBA`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16G16B16A16_FLOAT | `CL_RGBA`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16G16B16A16_UNORM | `CL_RGBA`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16G16B16A16_UINT | `CL_RGBA`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16G16B16A16_SNORM | `CL_RGBA`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16G16B16A16_SINT | `CL_RGBA`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_B8G8R8A8_UNORM | `CL_BGRA`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_UNORM | `CL_RGBA`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_UINT | `CL_RGBA`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8G8B8A8_SNORM | `CL_RGBA`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_SINT | `CL_RGBA`, `CL_SIGNED_INT8` -| | -| DXGI_FORMAT_R32G32_FLOAT | `CL_RG`, `CL_FLOAT` -| DXGI_FORMAT_R32G32_UINT | `CL_RG`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32G32_SINT | `CL_RG`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16G16_FLOAT | `CL_RG`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16G16_UNORM | `CL_RG`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16G16_UINT | `CL_RG`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16G16_SNORM | `CL_RG`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16G16_SINT | `CL_RG`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_R8G8_UNORM | `CL_RG`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8_UINT | `CL_RG`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8G8_SNORM | `CL_RG`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8G8_SINT | `CL_RG`, `CL_SIGNED_INT8` -| | -| DXGI_FORMAT_R32_FLOAT | `CL_R`, `CL_FLOAT` -| DXGI_FORMAT_R32_UINT | `CL_R`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32_SINT | `CL_R`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16_FLOAT | `CL_R`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16_UNORM | `CL_R`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16_UINT | `CL_R`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16_SNORM | `CL_R`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16_SINT | `CL_R`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_R8_UNORM | `CL_R`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8_UINT | `CL_R`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8_SNORM | `CL_R`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8_SINT | `CL_R`, `CL_SIGNED_INT8` -|==== - -[[cl_khr_d3d10_sharing-querying-direct3d-properties-of-memory-objects-created-from-direct3d-10-resources]] -==== Querying Direct3D properties of memory objects created from Direct3D 10 resources - -Properties of Direct3D 10 objects may be queried using {clGetMemObjectInfo} -and {clGetImageInfo} with _param_name_ {CL_MEM_D3D10_RESOURCE_KHR} and -{CL_IMAGE_D3D10_SUBRESOURCE_KHR} respectively as described in _sections 5.4.3_ -and _5.3.6_. - -[[cl_khr_d3d10_sharing-sharing-memory-objects-created-from-direct3d-10-resources-between-direct3d-10-and-opencl-contexts]] -==== Sharing memory objects created from Direct3D 10 resources between Direct3D 10 and OpenCL contexts - -The function - -include::{generated}/api/protos/clEnqueueAcquireD3D10ObjectsKHR.txt[] - -is used to acquire OpenCL memory objects that have been created from -Direct3D 10 resources. -The Direct3D 10 objects are acquired by the OpenCL context associated with -_command_queue_ and can therefore be used by all command-queues associated -with the OpenCL context. - -OpenCL memory objects created from Direct3D 10 resources must be acquired -before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a Direct3D 10 resource is used while -it is not currently acquired by OpenCL, the behavior is undefined. -Implementations may fail the execution of commands attempting to use that -OpenCL memory object and set their associated event's execution status to -{CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR}. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueAcquireD3D10ObjectsKHR} provides the synchronization -guarantee that any Direct3D 10 calls involving the interop device(s) used in -the OpenCL context made before {clEnqueueAcquireD3D10ObjectsKHR} is called -will complete executing before _event_ reports completion and before the -execution of any subsequent OpenCL work issued in _command_queue_ begins. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any Direct3D 10 calls involving the interop device(s) used -in the OpenCL context made before {clEnqueueAcquireD3D10ObjectsKHR} is -called have completed before calling {clEnqueueAcquireD3D10ObjectsKHR}. - -_command_queue_ is a valid command-queue. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from Direct3D 10 resources. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueAcquireD3D10ObjectsKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does -nothing and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from Direct3D 10 resources. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from an Direct3D 10 context. - * {CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR} if memory objects in - _mem_objects_ have previously been acquired using - {clEnqueueAcquireD3D10ObjectsKHR} but have not been released using - {clEnqueueReleaseD3D10ObjectsKHR}. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clEnqueueReleaseD3D10ObjectsKHR.txt[] - -is used to release OpenCL memory objects that have been created from -Direct3D 10 resources. -The Direct3D 10 objects are released by the OpenCL context associated with -_command_queue_. - -OpenCL memory objects created from Direct3D 10 resources which have been -acquired by OpenCL must be released by OpenCL before they may be accessed by -Direct3D 10. -Accessing a Direct3D 10 resource while its corresponding OpenCL memory -object is acquired is in error and will result in undefined behavior, -including but not limited to possible OpenCL errors, data corruption, and -program termination. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueReleaseD3D10ObjectsKHR} provides the synchronization -guarantee that any calls to Direct3D 10 calls involving the interop -device(s) used in the OpenCL context made after the call to -{clEnqueueReleaseD3D10ObjectsKHR} will not start executing until after all -events in _event_wait_list_ are complete and all work already submitted to -_command_queue_ completes execution. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any Direct3D 10 calls involving the interop device(s) used -in the OpenCL context made after {clEnqueueReleaseD3D10ObjectsKHR} will not -start executing until after event returned by -{clEnqueueReleaseD3D10ObjectsKHR} reports completion. - -_num_objects_ is the number of memory objects to be released in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from Direct3D 10 resources. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueReleaseD3D10ObjectsKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing -and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from Direct3D 10 resources. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from a Direct3D 10 device. - * {CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR} if memory objects in _mem_objects_ - have not previously been acquired using - {clEnqueueAcquireD3D10ObjectsKHR}, or have been released using - {clEnqueueReleaseD3D10ObjectsKHR} since the last time that they were - acquired. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - __num_events_in_wait_list__> is 0, or if event objects in - _event_wait_list_ are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_d3d10_sharing-event-command-types]] -==== Event Command Types for Sharing memory objects that map to Direct3D 10 objects - -The following table describes the event command types for the OpenCL commands -to acquire and release OpenCL memory objects that have been created from -Direct3D 10 objects: - -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* - -| {clEnqueueAcquireD3D10ObjectsKHR} -| {CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR_anchor} - -| {clEnqueueReleaseD3D10ObjectsKHR} -| {CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR_anchor} - -|==== - -[[cl_khr_d3d10_sharing-issues]] -=== Issues - - . Should this extension be KHR or EXT? -+ --- -PROPOSED: KHR. -If this extension is to be approved by Khronos then it should be KHR, -otherwise EXT. -Not all platforms can support this extension, but that is also true of -OpenGL interop. - -RESOLVED: KHR. --- - - . Requiring SharedHandle on ID3D10Resource -+ --- -Requiring this can largely simplify things at the DDI level and make some -implementations faster. -However, the DirectX spec only defines the shared handle for a subset of the -resources we would like to support: - ----- -D3D10_RESOURCE_MISC_SHARED - Enables the sharing of resource data between -two or more Direct3D devices. -The only resources that can be shared are 2D non-mipmapped textures. ----- - -PROPOSED A: Add wording to the spec about some implementations needing the -resource setup as shared: - -"`Some implementations may require the resource to be shared on the D3D10 -side of the API`" - -If we do that, do we need another enum to describe this failure case? - -PROPOSED B: Require that all implementations support both shared and -non-shared resources. -The restrictions prohibiting multisample textures and the flag -D3D10_USAGE_IMMUTABLE guarantee software access to all shareable resources. - -RESOLVED: Require that implementations support both -D3D10_RESOURCE_MISC_SHARED being set and not set. -Add the query for {CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR} to determine -on a per-context basis which method will be faster. --- - - . Texture1D support -+ --- -There is not a matching CL type, so do we want to support this and map to -buffer or Texture2D? - -RESOLVED: We will not add support for ID3D10Texture1D objects unless a -corresponding OpenCL 1D Image type is created. --- - - . CL/D3D10 queries -+ --- -The GL interop has {clGetGLObjectInfo} and {clGetGLTextureInfo}. -It is unclear if these are needed on the D3D10 interop side since the D3D10 -spec makes these queries trivial on the D3D10 object itself. -Also, not all of the semantics of the GL call map across. - -PROPOSED: Add the {clGetMemObjectInfo} and {clGetImageInfo} parameter names -{CL_MEM_D3D10_RESOURCE_KHR} and {CL_IMAGE_D3D10_SUBRESOURCE_KHR} to query the -D3D10 resource from which a {cl_mem_TYPE} was created. -From this data, any D3D10 side information may be queried using the D3D10 -API. - -RESOLVED: We will use {clGetMemObjectInfo} and {clGetImageInfo} to access -this information. --- diff --git a/ext/cl_khr_d3d11_sharing.asciidoc b/ext/cl_khr_d3d11_sharing.asciidoc deleted file mode 100644 index 6d14a3f5..00000000 --- a/ext/cl_khr_d3d11_sharing.asciidoc +++ /dev/null @@ -1,813 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_d3d11_sharing]] -== Creating OpenCL Memory Objects from Direct3D 11 Buffers and Textures - -[[cl_khr_d3d11_sharing-overview]] -=== Overview - -This section describes the *cl_khr_d3d11_sharing* extension. -The goal of this extension is to provide interoperability between OpenCL and -Direct3D 11. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_d3d11_sharing-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_int clGetDeviceIDsFromD3D11KHR(cl_platform_id platform, - cl_d3d11_device_source_khr d3d_device_source, - void *d3d_object, - cl_d3d11_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id *devices, - cl_uint *num_devices); - -cl_mem clCreateFromD3D11BufferKHR(cl_context context, - cl_mem_flags flags, - ID3D11Buffer *resource, - cl_int *errcode_ret); - -cl_mem clCreateFromD3D11Texture2DKHR(cl_context context, - cl_mem_flags flags, - ID3D11Texture2D *resource, - UINT subresource, - cl_int *errcode_ret); - -cl_mem clCreateFromD3D11Texture3DKHR(cl_context context, - cl_mem_flags flags, - ID3D11Texture3D *resource, - UINT subresource, - cl_int *errcode_ret); - -cl_int clEnqueueAcquireD3D11ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); ----- - -[[cl_khr_d3d11_sharing-new-tokens]] -=== New Tokens - -Accepted as a Direct3D 11 device source in the _d3d_device_source_ parameter -of {clGetDeviceIDsFromD3D11KHR}: - ----- -CL_D3D11_DEVICE_KHR -CL_D3D11_DXGI_ADAPTER_KHR ----- - -Accepted as a set of Direct3D 11 devices in the _d3d_device_set_parameter of -{clGetDeviceIDsFromD3D11KHR}: - ----- -CL_PREFERRED_DEVICES_FOR_D3D11_KHR -CL_ALL_DEVICES_FOR_D3D11_KHR ----- - -Accepted as a property name in the _properties_ parameter of -{clCreateContext} and {clCreateContextFromType}: - ----- -CL_CONTEXT_D3D11_DEVICE_KHR ----- - -Accepted as a property name in the _param_name_ parameter of -{clGetContextInfo}: - ----- -CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetMemObjectInfo}: - ----- -CL_MEM_D3D11_RESOURCE_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetImageInfo}: - ----- -CL_IMAGE_D3D11_SUBRESOURCE_KHR ----- - -Returned in the _param_value_ parameter of {clGetEventInfo} when -_param_name_ is {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR -CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR ----- - -Returned by {clCreateContext} and {clCreateContextFromType} if the Direct3D -11 device specified for interoperability is not compatible with the devices -against which the context is to be created: - ----- -CL_INVALID_D3D11_DEVICE_KHR ----- - -Returned by {clCreateFromD3D11BufferKHR} when _resource_ is not a Direct3D -11 buffer object, and by {clCreateFromD3D11Texture2DKHR} and -{clCreateFromD3D11Texture3DKHR} when _resource_ is not a Direct3D 11 texture -object. - ----- -CL_INVALID_D3D11_RESOURCE_KHR ----- - -Returned by {clEnqueueAcquireD3D11ObjectsKHR} when any of _mem_objects_ are -currently acquired by OpenCL: - ----- -CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR ----- - -Returned by {clEnqueueReleaseD3D11ObjectsKHR} when any of _mem_objects_ are -not currently acquired by OpenCL: - ----- -CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR ----- - -[[cl_khr_d3d11_sharing-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -In _section 4.4_, replace the description of _properties_ under -{clCreateContext} with: - -"_properties_ specifies a list of context property names and their -corresponding values. -Each property is followed immediately by the corresponding desired value. -The list is terminated with zero. -If a property is not specified in _properties_, then its default value -(listed in _table 4.5_) is used (it is said to be specified implicitly). -If _properties_ is `NULL` or empty (points to a list whose first value is -zero), all attributes take on their default values." - -Add the following to _table 4.5_: - -[cols="2,1,2",options="header",] -|==== -| *cl_context_properties enum* -| *Property value* -| *Description* - -| {CL_CONTEXT_D3D11_DEVICE_KHR} -| ID3D11Device * -| Specifies the ID3D11Device * to use for Direct3D 11 interoperability. - - The default value is `NULL`. - -|==== - -Add to the list of errors for {clCreateContext}: - - * {CL_INVALID_D3D11_DEVICE_KHR} if the value of the property - {CL_CONTEXT_D3D11_DEVICE_KHR} is non-`NULL` and does not specify a valid - Direct3D 11 device with which the _cl_device_ids_ against which this - context is to be created may interoperate. - * {CL_INVALID_OPERATION} if Direct3D 11 interoperability is specified by - setting {CL_INVALID_D3D11_DEVICE_KHR} to a non-`NULL` value, and - interoperability with another graphics API is also specified. - -Add to the list of errors for {clCreateContextFromType} the same new errors -described above for {clCreateContext}. - -Add the following row to _table 4.6_: - -[cols="2,1,2",options="header",] -|==== -| *cl_context_info* -| *Return Type* -| *Information returned in param_value* - -| {CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR} -| {cl_bool_TYPE} -| Returns {CL_TRUE} if Direct3D 11 resources created as shared by setting - _MiscFlags_ to include D3D11_RESOURCE_MISC_SHARED will perform faster when - shared with OpenCL, compared with resources which have not set this flag. - Otherwise returns {CL_FALSE}. -|==== - -[[cl_khr_d3d11_sharing-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add to the list of errors for {clGetMemObjectInfo}: - - * {CL_INVALID_D3D11_RESOURCE_KHR} if _param_name_ is - {CL_MEM_D3D11_RESOURCE_KHR} and _memobj_ was not created by the function - {clCreateFromD3D11BufferKHR}, {clCreateFromD3D11Texture2DKHR}, or - {clCreateFromD3D11Texture3DKHR}. - -Extend _table 5.12_ to include the following entry. - -[cols="2,1,2",options="header",] -|==== -| *cl_mem_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_MEM_D3D11_RESOURCE_KHR} -| ID3D11Resource * -| If _memobj_ was created using {clCreateFromD3D11BufferKHR}, - {clCreateFromD3D11Texture2DKHR}, or {clCreateFromD3D11Texture3DKHR}, - returns the _resource_ argument specified when _memobj_ was created. - -|==== - -Add to the list of errors for {clGetImageInfo}: - - * {CL_INVALID_D3D11_RESOURCE_KHR} if _param_name_ is - {CL_IMAGE_D3D11_SUBRESOURCE_KHR} and _image_ was not created by the function - {clCreateFromD3D11Texture2DKHR}, or {clCreateFromD3D11Texture3DKHR}. - -Extend _table 5.9_ to include the following entry. - -[cols="2,1,2",options="header",] -|==== -| *cl_image_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_IMAGE_D3D11_SUBRESOURCE_KHR} -| UINT -| If _image_ was created using {clCreateFromD3D11Texture2DKHR}, or - {clCreateFromD3D11Texture3DKHR}, returns the _subresource_ argument - specified when _image_ was created. -|==== - -Add to _table 5.22_ in the *Info returned in param_value* column for -_cl_event_info_ = {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR -CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR ----- - -[[cl_khr_d3d11_sharing-sharing-memory-objects-with-direct3d-11-resources]] -=== Sharing Memory Objects with Direct3D 11 Resources - -This section discusses OpenCL functions that allow applications to use -Direct3D 11 resources as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and Direct3D 11. -The OpenCL API may be used to execute kernels that read and/or write memory -objects that are also Direct3D 11 resources. -An OpenCL image object may be created from a Direct3D 11 texture resource. -An OpenCL buffer object may be created from a Direct3D 11 buffer resource. -OpenCL memory objects may be created from Direct3D 11 objects if and only if -the OpenCL context has been created from a Direct3D 11 device. - -[[cl_khr_d3d11_sharing-querying-opencl-devices-corresponding-to-direct3d-11-devices]] -==== Querying OpenCL Devices Corresponding to Direct3D 11 Devices - -The OpenCL devices corresponding to a Direct3D 11 device may be queried. -The OpenCL devices corresponding to a DXGI adapter may also be queried. -The OpenCL devices corresponding to a Direct3D 11 device will be a subset of -the OpenCL devices corresponding to the DXGI adapter against which the -Direct3D 11 device was created. - -The OpenCL devices corresponding to a Direct3D 11 device or a DXGI device -may be queried using the function - -include::{generated}/api/protos/clGetDeviceIDsFromD3D11KHR.txt[] - -_platform_ refers to the platform ID returned by {clGetPlatformIDs}. - -_d3d_device_source_ specifies the type of _d3d_object_, and must be one of -the values shown in the table below. - -_d3d_object_ specifies the object whose corresponding OpenCL devices are -being queried. -The type of _d3d_object_ must be as specified in the table below. - -_d3d_device_set_ specifies the set of devices to return, and must be one of -the values shown in the table below. - -_num_entries_ is the number of {cl_device_id_TYPE} entries that can be added to -_devices_. -If _devices_ is not `NULL` then _num_entries_ must be greater than zero. - -_devices_ returns a list of OpenCL devices found. -The {cl_device_id_TYPE} values returned in _devices_ can be used to identify a -specific OpenCL device. -If _devices_ is `NULL`, this argument is ignored. -The number of OpenCL devices returned is the minimum of the value specified -by _num_entries_ and the number of OpenCL devices corresponding to -_d3d_object_. - -_num_devices_ returns the number of OpenCL devices available that correspond -to _d3d_object_. -If _num_devices_ is `NULL`, this argument is ignored. - -{clGetDeviceIDsFromD3D11KHR} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise it may return - - * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. - * {CL_INVALID_VALUE} if _d3d_device_source_ is not a valid value, - _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero - and _devices_ is not `NULL`, or if both _num_devices_ and _devices_ are - `NULL`. - * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to _d3d_object_ - were found. - -[[cl_khr_d3d11_sharing-clGetDeviceIDsFromD3D11KHR-object-type]] -._Direct3D 11 object types that may be used by_ {clGetDeviceIDsFromD3D11KHR} -[cols=",",options="header",] -|==== -| {cl_d3d11_device_source_khr_TYPE} -| Type of _d3d_object_ - -| {CL_D3D11_DEVICE_KHR} -| ID3D11Device * - -| {CL_D3D11_DXGI_ADAPTER_KHR} -| IDXGIAdapter * - -|==== - -[[cl_khr_d3d11_sharing-clGetDeviceIDsFromD3D11KHR-devices]] -._Sets of devices queriable using_ {clGetDeviceIDsFromD3D11KHR} -[cols=",",options="header",] -|==== -| {cl_d3d11_device_set_khr_TYPE} -| Devices returned in _devices_ - -| {CL_PREFERRED_DEVICES_FOR_D3D11_KHR} -| The preferred OpenCL devices associated with the specified Direct3D - object. - -| {CL_ALL_DEVICES_FOR_D3D11_KHR} -| All OpenCL devices which may interoperate with the specified Direct3D - object. - Performance of sharing data on these devices may be considerably less than - on the preferred devices. - -|==== - -[[cl_khr_d3d11_sharing-lifetime-of-shared-objects]] -==== Lifetime of Shared Objects - -An OpenCL memory object created from a Direct3D 11 resource remains valid as -long as the corresponding Direct3D 11 resource has not been deleted. -If the Direct3D 11 resource is deleted through the Direct3D 11 API, -subsequent use of the OpenCL memory object will result in undefined -behavior, including but not limited to possible OpenCL errors, data -corruption, and program termination. - -The successful creation of a {cl_context_TYPE} against a Direct3D 11 device -specified via the context create parameter {CL_CONTEXT_D3D11_DEVICE_KHR} will -increment the internal Direct3D reference count on the specified Direct3D 11 -device. -The internal Direct3D reference count on that Direct3D 11 device will be -decremented when the OpenCL reference count on the returned OpenCL context -drops to zero. - -The OpenCL context and corresponding command-queues are dependent on the -existence of the Direct3D 11 device from which the OpenCL context was -created. -If the Direct3D 11 device is deleted through the Direct3D 11 API, subsequent -use of the OpenCL context will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -[[cl_khr_d3d11_sharing-sharing-direct3d-11-buffer-resources-as-opencl-buffer-objects]] -==== Sharing Direct3D 11 Buffer Resources as OpenCL Buffer Objects - -The function - -include::{generated}/api/protos/clCreateFromD3D11BufferKHR.txt[] - -creates an OpenCL buffer object from a Direct3D 11 buffer. - -_context_ is a valid OpenCL context created from a Direct3D 11 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to table 5.3 for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 11 buffer to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D11BufferKHR} returns a valid non-zero OpenCL buffer object -and _errcode_ret_ is set to {CL_SUCCESS} if the buffer object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid. - * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 buffer - resource, if _resource_ was created with the D3D11_USAGE flag - D3D11_USAGE_IMMUTABLE, if a {cl_mem_TYPE} from _resource_ has already been - created using {clCreateFromD3D11BufferKHR}, or if _context_ was not - created against the same Direct3D 11 device from which _resource_ was - created. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The size of the returned OpenCL buffer object is the same as the size of -_resource_. -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -[[cl_khr_d3d11_sharing-sharing-direct3d-11-texture-and-resources-as-opencl-image-objects]] -==== Sharing Direct3D 11 Texture and Resources as OpenCL Image Objects - -The function - -include::{generated}/api/protos/clCreateFromD3D11Texture2DKHR.txt[] - -creates an OpenCL 2D image object from a subresource of a Direct3D 11 2D -texture. - -_context_ is a valid OpenCL context created from a Direct3D 11 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 11 2D texture to share. - -_subresource_ is the subresource of _resource_ to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D11Texture2DKHR} returns a valid non-zero OpenCL image object -and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if - _subresource_ is not a valid subresource index for _resource_. - * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 texture - resource, if _resource_ was created with the D3D11_USAGE flag - D3D11_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a - {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already been - created using {clCreateFromD3D11Texture2DKHR}, or if _context_ was not - created against the same Direct3D 11 device from which _resource_ was - created. - * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 11 texture format of - _resource_ is not listed in the table - <> or if the Direct3D 11 texture - format of _resource_ does not map to a supported OpenCL image format. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The width and height of the returned OpenCL 2D image object are determined -by the width and height of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 2D image object is -determined by the format of _resource_ by the table -<>. - -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -The function - -include::{generated}/api/protos/clCreateFromD3D11Texture3DKHR.txt[] - -creates an OpenCL 3D image object from a subresource of a Direct3D 11 3D -texture. - -_context_ is a valid OpenCL context created from a Direct3D 11 device. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_resource_ is a pointer to the Direct3D 11 3D texture to share. - -_subresource_ is the subresource of _resource_ to share. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromD3D11Texture3DKHR} returns a valid non-zero OpenCL image object -and _errcode_ret_ is set to {CL_SUCCESS} if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if - _subresource_ is not a valid subresource index for _resource_. - * {CL_INVALID_D3D11_RESOURCE_KHR} if _resource_ is not a Direct3D 11 texture - resource, if _resource_ was created with the D3D11_USAGE flag - D3D11_USAGE_IMMUTABLE, if _resource_ is a multisampled texture, if a - {cl_mem_TYPE} from subresource _subresource_ of _resource_ has already been - created using {clCreateFromD3D11Texture3DKHR}, or if _context_ was not - created against the same Direct3D 11 device from which _resource_ was - created. - * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the Direct3D 11 texture format of - _resource_ is not listed in the table - <> or if the Direct3D 11 texture - format of _resource_ does not map to a supported OpenCL image format. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The width, height and depth of the returned OpenCL 3D image object are -determined by the width, height and depth of subresource _subresource_ of -_resource_. -The channel type and order of the returned OpenCL 3D image object is -determined by the format of _resource_ by the table -<>. - -This call will increment the internal Direct3D reference count on -_resource_. -The internal Direct3D reference count on _resource_ will be decremented when -the OpenCL reference count on the returned OpenCL memory object drops to -zero. - -[[cl_khr_d3d11_sharing-mapping-of-image-formats]] -._Direct3D 11 formats and corresponding OpenCL image formats_ -[cols=",",options="header",] -|==== -| *DXGI format* -| *CL image format* - -*(channel order, channel data type)* - -| DXGI_FORMAT_R32G32B32A32_FLOAT | `CL_RGBA`, `CL_FLOAT` -| DXGI_FORMAT_R32G32B32A32_UINT | `CL_RGBA`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32G32B32A32_SINT | `CL_RGBA`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16G16B16A16_FLOAT | `CL_RGBA`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16G16B16A16_UNORM | `CL_RGBA`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16G16B16A16_UINT | `CL_RGBA`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16G16B16A16_SNORM | `CL_RGBA`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16G16B16A16_SINT | `CL_RGBA`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_B8G8R8A8_UNORM | `CL_BGRA`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_UNORM | `CL_RGBA`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_UINT | `CL_RGBA`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8G8B8A8_SNORM | `CL_RGBA`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8G8B8A8_SINT | `CL_RGBA`, `CL_SIGNED_INT8` -| | -| DXGI_FORMAT_R32G32_FLOAT | `CL_RG`, `CL_FLOAT` -| DXGI_FORMAT_R32G32_UINT | `CL_RG`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32G32_SINT | `CL_RG`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16G16_FLOAT | `CL_RG`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16G16_UNORM | `CL_RG`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16G16_UINT | `CL_RG`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16G16_SNORM | `CL_RG`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16G16_SINT | `CL_RG`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_R8G8_UNORM | `CL_RG`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8G8_UINT | `CL_RG`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8G8_SNORM | `CL_RG`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8G8_SINT | `CL_RG`, `CL_SIGNED_INT8` -| | -| DXGI_FORMAT_R32_FLOAT | `CL_R`, `CL_FLOAT` -| DXGI_FORMAT_R32_UINT | `CL_R`, `CL_UNSIGNED_INT32` -| DXGI_FORMAT_R32_SINT | `CL_R`, `CL_SIGNED_INT32` -| | -| DXGI_FORMAT_R16_FLOAT | `CL_R`, `CL_HALF_FLOAT` -| DXGI_FORMAT_R16_UNORM | `CL_R`, `CL_UNORM_INT16` -| DXGI_FORMAT_R16_UINT | `CL_R`, `CL_UNSIGNED_INT16` -| DXGI_FORMAT_R16_SNORM | `CL_R`, `CL_SNORM_INT16` -| DXGI_FORMAT_R16_SINT | `CL_R`, `CL_SIGNED_INT16` -| | -| DXGI_FORMAT_R8_UNORM | `CL_R`, `CL_UNORM_INT8` -| DXGI_FORMAT_R8_UINT | `CL_R`, `CL_UNSIGNED_INT8` -| DXGI_FORMAT_R8_SNORM | `CL_R`, `CL_SNORM_INT8` -| DXGI_FORMAT_R8_SINT | `CL_R`, `CL_SIGNED_INT8` -|==== - -[[cl_khr_d3d11_sharing-querying-direct3d-properties-of-memory-objects-created-from-direct3d-11-resources]] -==== Querying Direct3D properties of memory objects created from Direct3D 11 resources - -Properties of Direct3D 11 objects may be queried using {clGetMemObjectInfo} -and {clGetImageInfo} with _param_name_ {CL_MEM_D3D11_RESOURCE_KHR} and -{CL_IMAGE_D3D11_SUBRESOURCE_KHR} respectively as described in _sections 5.4.3_ -and _5.3.6_. - -[[cl_khr_d3d11_sharing-sharing-memory-objects-created-from-direct3d-11-resources-between-direct3d-11-and-opencl-contexts]] -==== Sharing memory objects created from Direct3D 11 resources between Direct3D 11 and OpenCL contexts - -The function - -include::{generated}/api/protos/clEnqueueAcquireD3D11ObjectsKHR.txt[] - -is used to acquire OpenCL memory objects that have been created from -Direct3D 11 resources. -The Direct3D 11 objects are acquired by the OpenCL context associated with -_command_queue_ and can therefore be used by all command-queues associated -with the OpenCL context. - -OpenCL memory objects created from Direct3D 11 resources must be acquired -before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a Direct3D 11 resource is used while -it is not currently acquired by OpenCL, the behavior is undefined. -Implementations may fail the execution of commands attempting to use that -OpenCL memory object and set their associated event's execution status to -{CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR}. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueAcquireD3D11ObjectsKHR} provides the synchronization -guarantee that any Direct3D 11 calls involving the interop device(s) used in -the OpenCL context made before {clEnqueueAcquireD3D11ObjectsKHR} is called -will complete executing before _event_ reports completion and before the -execution of any subsequent OpenCL work issued in _command_queue_ begins. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any Direct3D 11 calls involving the interop device(s) used -in the OpenCL context made before {clEnqueueAcquireD3D11ObjectsKHR} is -called have completed before calling {clEnqueueAcquireD3D11ObjectsKHR}. - -_command_queue_ is a valid command-queue. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from Direct3D 11 resources. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueAcquireD3D11ObjectsKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does -nothing and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from Direct3D 11 resources. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from an Direct3D 11 context. - * {CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR} if memory objects in - _mem_objects_ have previously been acquired using - {clEnqueueAcquireD3D11ObjectsKHR} but have not been released using - {clEnqueueReleaseD3D11ObjectsKHR}. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clEnqueueReleaseD3D11ObjectsKHR.txt[] - -is used to release OpenCL memory objects that have been created from -Direct3D 11 resources. -The Direct3D 11 objects are released by the OpenCL context associated with -_command_queue_. - -OpenCL memory objects created from Direct3D 11 resources which have been -acquired by OpenCL must be released by OpenCL before they may be accessed by -Direct3D 11. -Accessing a Direct3D 11 resource while its corresponding OpenCL memory -object is acquired is in error and will result in undefined behavior, -including but not limited to possible OpenCL errors, data corruption, and -program termination. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueReleaseD3D11ObjectsKHR} provides the synchronization -guarantee that any calls to Direct3D 11 calls involving the interop -device(s) used in the OpenCL context made after the call to -{clEnqueueReleaseD3D11ObjectsKHR} will not start executing until after all -events in _event_wait_list_ are complete and all work already submitted to -_command_queue_ completes execution. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any Direct3D 11 calls involving the interop device(s) used -in the OpenCL context made after {clEnqueueReleaseD3D11ObjectsKHR} will not -start executing until after event returned by -{clEnqueueReleaseD3D11ObjectsKHR} reports completion. - -_num_objects_ is the number of memory objects to be released in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from Direct3D 11 resources. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueReleaseD3D11ObjectsKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing -and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from Direct3D 11 resources. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from a Direct3D 11 device. - * {CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR} if memory objects in _mem_objects_ - have not previously been acquired using - {clEnqueueAcquireD3D11ObjectsKHR}, or have been released using - {clEnqueueReleaseD3D11ObjectsKHR} since the last time that they were - acquired. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - __num_events_in_wait_list__> is 0, or if event objects in - _event_wait_list_ are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_d3d11_sharing-event-command-types]] -==== Event Command Types for Sharing memory objects that map to Direct3D 11 objects - -The following table describes the event command types for the OpenCL commands -to acquire and release OpenCL memory objects that have been created from -Direct3D 11 objects: - -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* - -| {clEnqueueAcquireD3D11ObjectsKHR} -| {CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR_anchor} - -| {clEnqueueReleaseD3D11ObjectsKHR} -| {CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR_anchor} - -|==== diff --git a/ext/cl_khr_depth_images.asciidoc b/ext/cl_khr_depth_images.asciidoc deleted file mode 100644 index c11c695e..00000000 --- a/ext/cl_khr_depth_images.asciidoc +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_depth_images]] -== Depth Images - -This section describes the *cl_khr_depth_images* extension. - -This extension adds support for depth images. - -This extension became a core feature in OpenCL 2.0. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_depth_images-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 1.2 Specification - -This extension adds the following new image formats for depth images to _tables 5.6 and 5.7_ of the OpenCL 1.2 specification. - -[cols="",options="header",] -|======================================================================= -|*Enum values that can be specified in channel_order* - -|{CL_DEPTH}. This format can only be used if channel data type = {CL_UNORM_INT16} or {CL_FLOAT}. - -|======================================================================= - -[cols="2,3",options="header",] -|======================================================================= -|*Image Channel Data Type* -|*Description* - -|{CL_UNORM_INT16} -|Each channel component is a normalized unsigned 16-bit integer value - -|{CL_FLOAT} -|Each channel component is a single precision floating-point value -|======================================================================= - -This extension adds the following new image format to the minimum list of supported image formats described in _table 5.8_: - -[[cl_khr_depth_images-required-image-formats]] -._Required Image Formats for_ *cl_khr_depth_images* -[cols=",,",options="header",] -|==================================================================== -|*num_channels* -|*channel_order* -|*channel_data_type* - -|1 -|{CL_DEPTH} -|{CL_UNORM_INT16} + -{CL_FLOAT} - -|==================================================================== - -NOTE: - -Depth image objects can be initialized, read and written using the appropriate CL APIs i.e. {clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueCopyImage}, {clEnqueueCopyImageToBuffer}, {clEnqueueCopyBufferToImage}, {clEnqueueMapImage} and {clEnqueueFillImage}. - -For {clEnqueueFillImage}, the fill color is a 4-component value where the R component refers to the depth value if the image format is {CL_DEPTH}. The fill color will be converted to the appropriate image channel format and order associated with image. - -Update text that describes arg value argument to {clSetKernelArg} with the following: - -If the kernel argument is declared to be of type image2d_depth_t or image2d_array_depth t, the arg_value entry will be a pointer to a depth image or depth image array object. - -Add the following error condition for {clSetKernelArg}: - -{CL_INVALID_MEM_OBJECT} for an argument declared to be a depth image or a depth image -array and the argument value specified in arg_value does not follow the rules described above -for a depth memory object or memory array object argument. - -[[cl_khr_depth_images-additions-to-chapter-6]] -=== Additions to Chapter 6 of the OpenCL 1.2 Specification - -Add the following new data types to _table 6.3_ in _section 6.1.3_ of the OpenCL 1.2 specification: - -[cols="2,3",options="header",] -|==== -|*Type* -|*Description* - -|*image2d_depth_t* -|A 2D depth image. Refer to _section 6.12.14_ for a detailed -description of the built-in functions that use this type. - -|*image2d_array_depth_t* -|A 2D depth image array. Refer to _section 6.12.14_ for a -detailed description of the built-in functions that use this -type. - -|==== - -Add the following to the bulleted list in section 6.12.14.1.1 - Determining the border color: - - * If the image channel order is {CL_DEPTH}, the border value is `0.0f`. - -Add the following built-in functions to section 6.12.14.2 - Built-in Image Read Functions: - -[cols="2,3",options="header",] -|==== -|*Function* -|*Description* - -| float *read_imagef*(read_only image2d_depth_t _image_, - sampler_t _sampler_, int2 _coord_) + - float *read_imagef*(read_only image2d_depth_t _image_, - sampler_t _sampler_, float2 _coord_) - | Use the coordinate (_coord.x_, _coord.y_) to do an element lookup in - the 2D depth image object specified by _image_. - - *read_imagef* returns a floating-point value in the range [0.0, 1.0] - for depth image objects created with _image_channel_data_type_ set to - {CL_UNORM_INT16} or {CL_UNORM_INT24}. - - *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to {CL_FLOAT}. - - The *read_imagef* calls that take integer coordinates must use a - sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized - coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode - set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or - `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. - - Values returned by *read_imagef* for depth image objects with - _image_channel_data_type_ values not specified in the description - above are undefined. -| | -| float *read_imagef*(read_only image2d_array_depth_t _image_, - sampler_t _sampler_, int4 _coord_) + - float *read_imagef*(read_only image2d_array_depth_t _image_, - sampler_t _sampler_, float4 _coord_) - | Use _coord.xy_ to do an element lookup in the 2D image identified by - _coord.z_ in the 2D depth image array specified by _image_. - - *read_imagef* returns a floating-point value in the range [0.0, 1.0] - for depth image objects created with _image_channel_data_type_ set to - {CL_UNORM_INT16} or {CL_UNORM_INT24}. - - *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to {CL_FLOAT}. - - The *read_imagef* calls that take integer coordinates must use a - sampler with filter mode set to `CLK_FILTER_NEAREST`, normalized - coordinates set to `CLK_NORMALIZED_COORDS_FALSE` and addressing mode - set to `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or - `CLK_ADDRESS_NONE`; otherwise the values returned are undefined. - - Values returned by *read_imagef* for image objects with - _image_channel_data_type_ values not specified in the description - above are undefined. - -|==== - -Add the following built-in functions to section 6.12.14.3 - Built-in Image Sampler-less Read Functions: - -[cols="2,3",options="header",] -|==== -|*Function* -|*Description* - -| float *read_imagef*(image2d_depth_t _image_, int2 _coord_) - | Use the coordinate (_coord.x_, _coord.y_) to do an element lookup in - the 2D depth image object specified by _image_. - - *read_imagef* returns a floating-point value in the range [0.0, 1.0] - for depth image objects created with _image_channel_data_type_ set to - {CL_UNORM_INT16} or {CL_UNORM_INT24}. - - *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to {CL_FLOAT}. - - Values returned by *read_imagef* for image objects with - _image_channel_data_type_ values not specified in the description - above are undefined. -| | -| float *read_imagef*(image2d_array_depth_t _image_, int4 _coord_) - | Use _coord.xy_ to do an element lookup in the 2D image identified by - _coord.z_ in the 2D depth image array specified by _image_. - - *read_imagef* returns a floating-point value in the range [0.0, 1.0] - for depth image objects created with _image_channel_data_type_ set to - {CL_UNORM_INT16} or {CL_UNORM_INT24}. - - *read_imagef* returns a floating-point value for depth image objects - created with _image_channel_data_type_ set to {CL_FLOAT}. - - Values returned by *read_imagef* for image objects with - _image_channel_data_type_ values not specified in the description - above are undefined. - -|==== - -Add the following built-in functions to section 6.12.14.4 – Built-in Image Write Functions: - -[cols="2,3",options="header",] -|==== -|*Function* -|*Description* - -| void *write_imagef*(image2d_depth_t _image_, int2 _coord_, - float _depth_) - | Write _depth_ value to location specified by _coord.xy_ in the 2D - depth image object specified by _image_. - Appropriate data format conversion to the specified image format is - done before writing the depth value. - _coord.x_ and _coord.y_ are considered to be unnormalized coordinates, - and must be in the range [0, image width-1], and [0, image height-1], - respectively. - - *write_imagef* can only be used with image objects created with - _image_channel_data_type_ set to {CL_UNORM_INT16}, {CL_UNORM_INT24} or - {CL_FLOAT}. - Appropriate data format conversion will be done to convert depth value - from a floating-point value to actual data format associated with the - image. - - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for - image objects created with _image_channel_data_type_ values not - specified in the description above or with (_x_, _y_) coordinate - values that are not in the range [0, image width-1] and [0, image - height-1], respectively, is undefined. -| | -| void *write_imagef*(image2d_array_depth_t _image_, int4 _coord_, - float _depth_) - | Write _depth_ value to location specified by _coord.xy_ in the 2D - image identified by _coord.z_ in the 2D depth image array specified by - _image_. - Appropriate data format conversion to the specified image format is - done before writing the depth value. - _coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized - coordinates, and must be in the range [0, image width-1], [0, image - height-1], and [0, image number of layers-1], respectively. - - *write_imagef* can only be used with image objects created with - _image_channel_data_type_ set to {CL_UNORM_INT16}, {CL_UNORM_INT24} or - {CL_FLOAT}. - Appropriate data format conversion will be done to convert depth valye - from a floating-point value to actual data format associated with the - image. - - The behavior of *write_imagef*, *write_imagei* and *write_imageui* for - image objects created with _image_channel_data_type_ values not - specified in the description above or with (_x_, _y_, _z_) coordinate - values that are not in the range [0, image width-1], [0, image - height-1], [0, image number of layers-1], respectively, is undefined. - -|==== - -Add the following built-in functions to section 6.12.14.5 – Built-in Image Query Functions: - -[cols="2,3",] -|==== -| *Function* | *Description* -| int *get_image_width*(image2d_depth_t _image_) + - int *get_image_width*(image2d_array_depth_t _image_) - | Return the image width in pixels. -| int *get_image_height*(image2d_depth_t _image_) + - int *get_image_height*(image2d_array_depth_t _image_) - | Return the image height in pixels. -| | -| int *get_image_channel_data_type*(image2d_depth_t _image_) + - int *get_image_channel_data_type*(image2d_array_depth_t _image_) - | Return the channel data type. Valid values are: - - `CLK_UNORM_INT16` + - `CLK_FLOAT` -| int *get_image_channel_order*(image2d_depth_t _image_) + - int *get_image_channel_order*(image2d_array_depth_t _image_) - | Return the image channel order. Valid values are: - - `CLK_DEPTH` -| | -| int2 *get_image_dim*(image2d_depth_t _image_) + - int2 *get_image_dim*(image2d_array_depth_t _image_) - | Return the 2D image width and height as an int2 type. - The width is returned in the _x_ component, and the height in the _y_ - component. -| | -| size_t *get_image_array_size*(image2d_array_depth_t _image_) - | Return the number of images in the 2D image array. -|==== - -Add the following text below the table in section 6.12.14.6 - Mapping image channels to color values returned by read_image -and color values passed to write_image to image channels: - -For {CL_DEPTH} images, a scalar value is returned by *read_imagef* or -supplied to *write_imagef*. diff --git a/ext/cl_khr_device_enqueue_local_arg_types.asciidoc b/ext/cl_khr_device_enqueue_local_arg_types.asciidoc deleted file mode 100644 index 19f34198..00000000 --- a/ext/cl_khr_device_enqueue_local_arg_types.asciidoc +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_device_enqueue_local_arg_types]] -== Device Enqueue Local Argument Types - -This extension allows arguments to blocks that are passed to the *enqueue_kernel* built-in -function to be pointers to any type (built-in or user-defined) in local memory, instead of -requiring arguments to blocks to be pointers to void in local memory. - -The name of this extension is *cl_khr_device_enqueue_local_arg_types*. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_device_enqueue_local_arg_types-additions-to-chapter-6-of-the-opencl-2.0-specification]] -=== Additions to Chapter 6 of the OpenCL 2.0 C Specification - -Modify the second paragraph of Section 6.13.17: Enqueuing Kernels: - -"The following table describes the list of built-in functions that can be used to enqueue a -kernel. We use the generic type name +gentype+ to indicate the built-in OpenCL C scalar or -vector integer or floating-point data types, or any user defined type built from these scalar and -vector data types, which can be used as the type of the pointee of the arguments of the kernel -enqueue functions listed in table 6.31." - -Then, replace all occurrences of +local void *+ in table 6.31 with +local gentype *+. For example: - -[source,opencl_c] ----- -int enqueue_kernel(queue_t queue, - kernel_enqueue_flags_t flags, - const ndrange_t ndrange, - void (^block)(local gentype *, ...), - uint size0, ... ) ----- - -Additionally, replace all occurrences of +local void*+ in table 6.33 with +local gentype *+. For example: - -[source,opencl_c] ----- -uint get_kernel_work_group_size( - void (^block)(local gentype *, ...)) ----- diff --git a/ext/cl_khr_device_uuid.asciidoc b/ext/cl_khr_device_uuid.asciidoc deleted file mode 100644 index e4005dca..00000000 --- a/ext/cl_khr_device_uuid.asciidoc +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_device_uuid]] -== Unique Device Identifiers - -This section describes the *cl_khr_device_uuid* extension. - -This extension adds the ability to query a universally unique identifier -(UUID) for an OpenCL driver and OpenCL device. -The UUIDs returned by the query may be used to identify drivers and devices -across processes or APIs. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-08-27 | 1.0.0 | First assigned version. -|==== - -// == New API Enums -// -// Accepted value for the _param_name_ parameter to {clGetDeviceInfo}: -// -// [source,opencl] -// ---- -// #define CL_DEVICE_UUID_KHR 0x106A -// #define CL_DRIVER_UUID_KHR 0x106B -// #define CL_DEVICE_LUID_VALID_KHR 0x106C -// #define CL_DEVICE_LUID_KHR 0x106D -// #define CL_DEVICE_NODE_MASK_KHR 0x106E -// ---- -// -// Constants describing the size of the driver and device UUIDs, and the device LUID: -// -// [source,opencl] -// ---- -// #define CL_UUID_SIZE_KHR 16 -// #define CL_LUID_SIZE_KHR 8 -// ---- - -=== Additions to Chapter 4 of the OpenCL 3.0 API Specification - -Add to Table 5 - OpenCL Device Queries: - -[caption="Table 5. "] -.List of supported param_names by {clGetDeviceInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Device Info | Return Type | Description -| {CL_DEVICE_UUID_KHR} - | {cl_uchar_TYPE}[{CL_UUID_SIZE_KHR}] - | Returns a universally unique identifier (UUID) for the device. - - Device UUIDs must be immutable for a given device across processes, driver APIs, driver versions, and system reboots. - -| {CL_DRIVER_UUID_KHR} - | {cl_uchar_TYPE}[{CL_UUID_SIZE_KHR}] - | Returns a universally unique identifier (UUID) for the software driver for the device. - -| {CL_DEVICE_LUID_VALID_KHR} - | {cl_bool_TYPE} - | Returns {CL_TRUE} if the device has a valid LUID and {CL_FALSE} otherwise. - -| {CL_DEVICE_LUID_KHR} - | {cl_uchar_TYPE}[{CL_LUID_SIZE_KHR}] - | Returns a locally unique identifier (LUID) for the device. - - It is not an error to query {CL_DEVICE_LUID_KHR} when {CL_DEVICE_LUID_VALID_KHR} returns {CL_FALSE}, but in this case the returned LUID value is undefined. - - When {CL_DEVICE_LUID_VALID_KHR} returns {CL_TRUE}, and the OpenCL device is running on the Windows operating system, the returned LUID value can be cast to an `LUID` object and must be equal to the locally unique identifier of an `IDXGIAdapter1` object that corresponds to the OpenCL device. - -| {CL_DEVICE_NODE_MASK_KHR} - | {cl_uint_TYPE} - | Returns a node mask for the device. - - It is not an error to query {CL_DEVICE_NODE_MASK_KHR} when {CL_DEVICE_LUID_VALID_KHR} returns {CL_FALSE}, but in this case the returned node mask is undefined. - - When {CL_DEVICE_LUID_VALID_KHR} returns {CL_TRUE}, the returned node mask must contain exactly one bit. - If the OpenCL device is running on an operating system that supports the Direct3D 12 API and the OpenCL device corresponds to an individual device in a linked device adapter, the returned node mask identifies the Direct3D 12 node corresponding to the OpenCL device. - Otherwise, the returned node mask must be `1`. - -|==== - -NOTE: While {CL_DEVICE_UUID_KHR} is specified to remain consistent across driver versions and system reboots, it is not intended to be usable as a serializable persistent identifier for a device. -It may change when a device is physically added to, removed from, or moved to a different connector in a system while that system is powered down. -Further, there is no reasonable way to verify with conformance testing that a given device retains the same UUID in a given system across all driver versions supported in that system. -While implementations should make every effort to report consistent device UUIDs across driver versions, applications should avoid relying on the persistence of this value for uses other than identifying compatible devices for external object sharing purposes. diff --git a/ext/cl_khr_dx9_media_sharing.asciidoc b/ext/cl_khr_dx9_media_sharing.asciidoc deleted file mode 100644 index a350af40..00000000 --- a/ext/cl_khr_dx9_media_sharing.asciidoc +++ /dev/null @@ -1,737 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_dx9_media_sharing]] -== Creating OpenCL Memory Objects from DirectX 9 Media Surfaces - -[[cl_khr_dx9_media_sharing-overview]] -=== Overview - -This section describes the *cl_khr_dx9_media_sharing* extension. -The goal of this extension is to allow applications to use media surfaces as -OpenCL memory objects. -This allows efficient sharing of data between OpenCL and selected adapter -APIs (only DX9 for now). -If this extension is supported, an OpenCL image object can be created from a -media surface and the OpenCL API can be used to execute kernels that read -and/or write memory objects that are media surfaces. -Note that OpenCL memory objects may be created from the adapter media -surface if and only if the OpenCL context has been created from that -adapter. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_dx9_media_sharing-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_int clGetDeviceIDsFromDX9MediaAdapterKHR( - cl_platform_id platform, - cl_uint num_media_adapters, - cl_dx9_media_adapter_type_khr *media_adapters_type, - void *media_adapters, - cl_dx9_media_adapter_set_khr media_adapter_set, - cl_uint num_entries, - cl_device_id *devices, - cl_int *num_devices); - -cl_mem clCreateFromDX9MediaSurfaceKHR(cl_context context, - cl_mem_flags flags, - cl_dx9_media_adapter_type_khr adapter_type, - void *surface_info, - cl_uint plane, - cl_int *errcode_ret); - -cl_int clEnqueueAcquireDX9MediaSurfacesKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); ----- - -[[cl_khr_dx9_media_sharing-new-tokens]] -=== New Tokens - -Accepted by the _media_adapter_type_ parameter of -{clGetDeviceIDsFromDX9MediaAdapterKHR}: - ----- -CL_ADAPTER_D3D9_KHR -CL_ADAPTER_D3D9EX_KHR -CL_ADAPTER_DXVA_KHR ----- - -Accepted by the _media_adapter_set_ parameter of -{clGetDeviceIDsFromDX9MediaAdapterKHR}: - ----- -CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR -CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR ----- - -Accepted as a property name in the _properties_ parameter of -{clCreateContext} and {clCreateContextFromType}: - ----- -CL_CONTEXT_ADAPTER_D3D9_KHR -CL_CONTEXT_ADAPTER_D3D9EX_KHR -CL_CONTEXT_ADAPTER_DXVA_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetMemObjectInfo}: - ----- -CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR -CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR ----- - -Accepted as the property being queried in the _param_name_ parameter of -{clGetImageInfo}: - ----- -CL_IMAGE_DX9_MEDIA_PLANE_KHR ----- - -Returned in the _param_value_ parameter of {clGetEventInfo} when -_param_name_ is {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR -CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR ----- - -Returned by {clCreateContext} and {clCreateContextFromType} if the media -adapter specified for interoperability is not compatible with the devices -against which the context is to be created: - ----- -CL_INVALID_DX9_MEDIA_ADAPTER_KHR ----- - -Returned by {clCreateFromDX9MediaSurfaceKHR} when _adapter_type_ is set to a -media adapter and the _surface_info_ does not reference a media surface of -the required type, or if _adapter_type_ is set to a media adapter type and -_surface_info_ does not contain a valid reference to a media surface on that -adapter, by {clGetMemObjectInfo} when _param_name_ is a surface or handle -when the image was not created from an appropriate media surface, and from -{clGetImageInfo} when _param_name_ is {CL_IMAGE_DX9_MEDIA_PLANE_KHR} and image -was not created from an appropriate media surface. - ----- -CL_INVALID_DX9_MEDIA_SURFACE_KHR ----- - -Returned by {clEnqueueAcquireDX9MediaSurfacesKHR} when any of _mem_objects_ -are currently acquired by OpenCL: - ----- -CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR ----- - -Returned by {clEnqueueReleaseDX9MediaSurfacesKHR} when any of _mem_objects_ -are not currently acquired by OpenCL: - ----- -CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR ----- - -[[cl_khr_dx9_media_sharing-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -In _section 4.4_, replace the description of _properties_ under -{clCreateContext} with: - -"`_properties_ specifies a list of context property names and their -corresponding values. -Each property is followed immediately by the corresponding desired value. -The list is terminated with zero. -If a property is not specified in _properties_, then its default value -(listed in _table 4.5_) is used (it is said to be specified implicitly). -If _properties_ is `NULL` or empty (points to a list whose first value is -zero), all attributes take on their default values.`" - -Add the following to _table 4.5_: - -[cols=",,",options="header",] -|==== -| *cl_context_properties enum* -| *Property value* -| *Description* - -| {CL_CONTEXT_ADAPTER_D3D9_KHR} -| IDirect3DDevice9 * -| Specifies an IDirect3DDevice9 to use for D3D9 interop. - -| {CL_CONTEXT_ADAPTER_D3D9EX_KHR} -| IDirect3DDeviceEx* -| Specifies an IDirect3DDevice9Ex to use for D3D9 interop. - -| {CL_CONTEXT_ADAPTER_DXVA_KHR} -| IDXVAHD_Device * -| Specifies an IDXVAHD_Device to use for DXVA interop. - -|==== - -Add to the list of errors for {clCreateContext}: - - * {CL_INVALID_DX9_MEDIA_ADAPTER_KHR} if any of the values of the properties - {CL_CONTEXT_ADAPTER_D3D9_KHR}, {CL_CONTEXT_ADAPTER_D3D9EX_KHR} or - {CL_CONTEXT_ADAPTER_DXVA_KHR} is non-`NULL` and does not specify a valid - media adapter with which the _cl_device_ids_ against which this context - is to be created may interoperate. - -Add to the list of errors for {clCreateContextFromType} the same new errors -described above for {clCreateContext}. - -[[cl_khr_dx9_media_sharing-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add to the list of errors for {clGetMemObjectInfo}: - - * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _param_name_ is - {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} and _memobj_ was not created by the - function {clCreateFromDX9MediaSurfaceKHR} from a Direct3D9 surface. - -Extend _table 5.12_ to include the following entry: - -[cols=",,",options="header",] -|==== -| *cl_mem_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR} -| {cl_dx9_media_adapter_type_khr_TYPE} -| Returns the {cl_dx9_media_adapter_type_khr_TYPE} argument value specified when - _memobj_ is created using {clCreateFromDX9MediaSurfaceKHR}. - -| {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} -| {cl_dx9_surface_info_khr_TYPE} -| Returns the {cl_dx9_surface_info_khr_TYPE} argument value specified when - _memobj_ is created using {clCreateFromDX9MediaSurfaceKHR}. - -|==== - -Add to the list of errors for {clGetImageInfo}: - - * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _param_name_ is - {CL_IMAGE_DX9_MEDIA_PLANE_KHR} and _image_ was not created by the function - {clCreateFromDX9MediaSurfaceKHR}. - -Extend _table 5.9_ to include the following entry. - -[cols=",,",options="header",] -|==== -| *cl_image_info* -| *Return type* -| *Info. returned in _param_value_* - -| {CL_IMAGE_DX9_MEDIA_PLANE_KHR} -| {cl_uint_TYPE} -| Returns the _plane_ argument value specified when _memobj_ is created - using {clCreateFromDX9MediaSurfaceKHR}. - -|==== - -Add to _table 5.22_ in the *Info returned in param_value* column for -_cl_event_info_ = {CL_EVENT_COMMAND_TYPE}: - ----- -CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR -CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR ----- - -[[cl_khr_dx9_media_sharing-sharing-media-surfaces-with-opencl]] -=== Sharing Media Surfaces with OpenCL - -This section discusses OpenCL functions that allow applications to use media -surfaces as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and media surface APIs. -The OpenCL API may be used to execute kernels that read and/or write memory -objects that are also media surfaces. -An OpenCL image object may be created from a media surface. -OpenCL memory objects may be created from media surfaces if and only if the -OpenCL context has been created from a media adapter. - -[[cl_khr_dx9_media_sharing-querying-opencl-devices-corresponding-to-media-adapters]] -==== Querying OpenCL Devices corresponding to Media Adapters - -Media adapters are an abstraction associated with devices that provide media -capabilities. - -The function - -include::{generated}/api/protos/clGetDeviceIDsFromDX9MediaAdapterKHR.txt[] - -queries a media adapter for any associated OpenCL devices. -Adapters with associated OpenCL devices can enable media surface sharing -between the two. - -_platform_ refers to the platform ID returned by {clGetPlatformIDs}. - -_num_media_adapters_ specifies the number of media adapters. - -_media_adapters_type_ is an array of _num_media_adapters_ entries. -Each entry specifies the type of media adapter and must be one of the values -described in the table below. - -[[cl_khr_dx9_media_sharing-media-adapter-types]] -.DirectX 9 object types that may be used by {clGetDeviceIDsFromDX9MediaAdapterKHR} -[cols=",",options="header",] -|==== -| {cl_dx9_media_adapter_type_khr_TYPE} -| Type of media adapter - -| {CL_ADAPTER_D3D9_KHR} -| IDirect3DDevice9 * - -| {CL_ADAPTER_D3D9EX_KHR} -| IDirect3DDevice9Ex * - -| {CL_ADAPTER_DXVA_KHR} -| IDXVAHD_Device * - -|==== - -[[cl_khr_dx9_media_sharing-media-adapter-sets]] -.Sets of devices queriable using {clGetDeviceIDsFromDX9MediaAdapterKHR} -[cols=",",options="header",] -|==== -| {cl_dx9_media_adapter_set_khr_TYPE} -| Description - -| {CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR} -| The preferred OpenCL devices associated with the media adapter. - -| {CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR} -| All OpenCL devices that may interoperate with the media adapter -|==== - -_media_adapters_ is an array of _num_media_adapters_ entries. -Each entry specifies the actual adapter whose type is specified by -_media_adapter_type_. -The _media_adapters_ must be one of the types described in the table -<>. -_media_adapter_set_ specifies the set of adapters to return and must be one -of the values described in the table -<<[[cl_khr_dx9_media_sharing-media-adapter-sets,_cl_dx9_media_adapter_set_khr -values_>>. - -_num_entries_ is the number of {cl_device_id_TYPE} entries that can be added to -_devices_. -If _devices_ is not `NULL`, the _num_entries_ must be greater than zero. - -_devices_ returns a list of OpenCL devices found that support the list of -media adapters specified. -The {cl_device_id_TYPE} values returned in _devices_ can be used to identify a -specific OpenCL device. -If _devices_ argument is `NULL`, this argument is ignored. -The number of OpenCL devices returned is the minimum of the value specified -by _num_entries_ or the number of OpenCL devices whose type matches -_device_type_. - -_num_devices_ returns the number of OpenCL devices. -If _num_devices_ is `NULL`, this argument is ignored. - -{clGetDeviceIDsFromDX9MediaAdapterKHR} returns {CL_SUCCESS} if the function is -executed successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_PLATFORM} if _platform_ is not a valid platform. - * {CL_INVALID_VALUE} if _num_media_adapters_ is zero or if - _media_adapters_type_ is `NULL` or if _media_adapters_ is `NULL`. - * {CL_INVALID_VALUE} if any of the entries in _media_adapters_type_ or - _media_adapters_ is not a valid value. - * {CL_INVALID_VALUE} if _media_adapter_set_ is not a valid value. - * {CL_INVALID_VALUE} if _num_entries_ is equal to zero and _devices_ is not - `NULL` or if both _num_devices_ and _devices_ are `NULL`. - * {CL_DEVICE_NOT_FOUND} if no OpenCL devices that correspond to adapters - specified in _media_adapters_ and _media_adapters_type_ were found. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_dx9_media_sharing-creating-media-resources-as-opencl-image-objects]] -==== Creating Media Resources as OpenCL Image Objects - -The function - -include::{generated}/api/protos/clCreateFromDX9MediaSurfaceKHR.txt[] - -creates an OpenCL image object from a media surface. - -_context_ is a valid OpenCL context created from a media adapter. - -flags is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of flags. -Only {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} values -specified in _table 5.3_ can be used. - -_adapter_type_ is a value from enumeration of supported adapters described -in the table -<>. -The type of _surface_info_ is determined by the adapter type. -The implementation does not need to support all adapter types. -This approach provides flexibility to support additional adapter types in -the future. -Supported adapter types are {CL_ADAPTER_D3D9_KHR}, {CL_ADAPTER_D3D9EX_KHR} and -{CL_ADAPTER_DXVA_KHR}. - -If _adapter_type_ is {CL_ADAPTER_D3D9_KHR}, {CL_ADAPTER_D3D9EX_KHR} and -{CL_ADAPTER_DXVA_KHR}, the _surface_info_ points to the following structure: - -include::{generated}/api/structs/cl_dx9_surface_info_khr.txt[] - -For DX9 surfaces, we need both the handle to the resource and the resource -itself to have a sufficient amount of information to eliminate a copy of the -surface for sharing in cases where this is possible. -Elimination of the copy is driver dependent. -_shared_handle_ may be `NULL` and this may result in sub-optimal -performance. - -_surface_info_ is a pointer to one of the structures defined in the -_adapter_type_ description above passed in as a void *. - -_plane_ is the plane of resource to share for planar surface formats. -For planar formats, we use the plane parameter to obtain a handle to thie -specific plane (Y, U or V for example). -For non-planar formats used by media, _plane_ must be 0. - -_errcode_ret_ will return an appropriate error code. -If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateFromDX9MediaSurfaceKHR} returns a valid non-zero 2D image object -and _errcode_ret_ is set to {CL_SUCCESS} if the 2D image object is created -successfully. -Otherwise it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid context. - * {CL_INVALID_VALUE} if values specified in _flags_ are not valid or if - _plane_ is not a valid plane of _resource_ specified in _surface_info_. - * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _resource_ specified in - _surface_info_ is not a valid resource or is not associated with - _adapter_type_ (e.g., _adapter_type_ is set to {CL_ADAPTER_D3D9_KHR} and - _resource_ is not a Direct3D 9 surface created in D3DPOOL_DEFAULT). - * {CL_INVALID_DX9_MEDIA_SURFACE_KHR} if _shared_handle_ specified in - _surface_info_ is not `NULL` or a valid handle value. - * {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR} if the texture format of _resource_ - is not listed in <> or - <>. - * {CL_INVALID_OPERATION} if there are no devices in _context_ that support - _adapter_type_. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The width and height of the returned OpenCL 2D image object are determined -by the width and height of the plane of resource. -The channel type and order of the returned image object is determined by the -format and plane of resource and are described in the table -<> or -<>. - -This call will increment the internal media surface count on _resource_. -The internal media surface reference count on _resource_ will be decremented -when the OpenCL reference count on the returned OpenCL memory object drops -to zero. - -[[cl_khr_dx9_media_sharing-querying-media-surface-properties-of-memory-objects-created-from-media-surfaces]] -==== Querying Media Surface Properties of Memory Objects created from Media Surfaces - -Properties of media surface objects may be queried using -{clGetMemObjectInfo} and {clGetImageInfo} with _param_name_ -{CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR}, {CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR} and -{CL_IMAGE_DX9_MEDIA_PLANE_KHR} as described in _sections 5.4.3_ and _5.3.6_. - -[[cl_khr_dx9_media_sharing-sharing-memory-objects-created-from-media-surfaces-between-a-media-adapter-and-opencl]] -==== Sharing Memory Objects created from Media Surfaces between a Media Adapter and OpenCL - -The function - -include::{generated}/api/protos/clEnqueueAcquireDX9MediaSurfacesKHR.txt[] - -is used to acquire OpenCL memory objects that have been created from a media -surface. -The media surfaces are acquired by the OpenCL context associated with -_command_queue_ and can therefore be used by all command-queues associated -with the OpenCL context. - -OpenCL memory objects created from media surfaces must be acquired before -they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a media surface is used while it is -not currently acquired by OpenCL, the call attempting to use that OpenCL -memory object will return {CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR}. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueAcquireDX9MediaSurfacesKHR} provides the synchronization -guarantee that any media adapter API calls involving the interop device(s) -used in the OpenCL context made before {clEnqueueAcquireDX9MediaSurfacesKHR} -is called will complete executing before _event_ reports completion and -before the execution of any subsequent OpenCL work issued in _command_queue_ -begins. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any media adapter API calls involving the interop -device(s) used in the OpenCL context made before -{clEnqueueAcquireDX9MediaSurfacesKHR} is called have completed before -calling {clEnqueueAcquireDX9MediaSurfacesKHR} *.* - -_command_queue_ is a valid command-queue. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from media surfaces. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueAcquireDX9MediaSurfacesKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does -nothing and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from media surfaces. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from a device that can share the media surface referenced by - _mem_objects_. - * {CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR} if memory objects in - _mem_objects_ have previously been acquired using - {clEnqueueAcquireDX9MediaSurfacesKHR} but have not been released using - {clEnqueueReleaseDX9MediaSurfacesKHR}. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function - -include::{generated}/api/protos/clEnqueueReleaseDX9MediaSurfacesKHR.txt[] - -is used to release OpenCL memory objects that have been created from media -surfaces. -The media surfaces are released by the OpenCL context associated with -_command_queue_. - -OpenCL memory objects created from media surfaces which have been acquired -by OpenCL must be released by OpenCL before they may be accessed by the -media adapter API. -Accessing a media surface while its corresponding OpenCL memory object is -acquired is in error and will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -If {CL_CONTEXT_INTEROP_USER_SYNC} is not specified as {CL_TRUE} during context -creation, {clEnqueueReleaseDX9MediaSurfacesKHR} provides the synchronization -guarantee that any calls to media adapter APIs involving the interop -device(s) used in the OpenCL context made after the call to -{clEnqueueReleaseDX9MediaSurfacesKHR} will not start executing until after -all events in _event_wait_list_ are complete and all work already submitted -to _command_queue_ completes execution. -If the context was created with properties specifying -{CL_CONTEXT_INTEROP_USER_SYNC} as {CL_TRUE}, the user is responsible for -guaranteeing that any media adapter API calls involving the interop -device(s) used in the OpenCL context made after -{clEnqueueReleaseDX9MediaSurfacesKHR} will not start executing until after -event returned by {clEnqueueReleaseDX9MediaSurfacesKHR} reports completion. - -_num_objects_ is the number of memory objects to be released in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from media surfaces. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -{clEnqueueReleaseDX9MediaSurfacesKHR} returns {CL_SUCCESS} if the function is -executed successfully. -If _num_objects_ is 0 and <__mem_objects__> is `NULL` the function does -nothing and returns {CL_SUCCESS}. -Otherwise it returns one of the following errors: - - * {CL_INVALID_VALUE} if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * {CL_INVALID_MEM_OBJECT} if memory objects in _mem_objects_ are not valid - OpenCL memory objects or if memory objects in _mem_objects_ have not - been created from valid media surfaces. - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid - command-queue. - * {CL_INVALID_CONTEXT} if context associated with _command_queue_ was not - created from a media object. - * {CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR} if memory objects in _mem_objects_ - have not previously been acquired using - {clEnqueueAcquireDX9MediaSurfacesKHR}, or have been released using - {clEnqueueReleaseDX9MediaSurfacesKHR} since the last time that they were - acquired. - * {CL_INVALID_EVENT_WAIT_LIST} if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - __num_events_in_wait_list__> is 0, or if event objects in - _event_wait_list_ are not valid events. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_dx9_media_sharing-event-command-types]] -==== Event Command Types for Sharing Memory Objects created from Media Surfaces - -The following table describes the event command types for the OpenCL commands -to acquire and release OpenCL memory objects that have been created from -media surfaces: - -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* - -| {clEnqueueAcquireDX9MediaSurfacesKHR} -| {CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR_anchor} - -| {clEnqueueReleaseDX9MediaSurfacesKHR} -| {CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR_anchor} - -|==== - -[[cl_khr_dx9_media_sharing-surface-formats-for-media-surface-sharing]] -==== Surface formats for Media Surface Sharing - -This section includes the D3D surface formats that are supported when the -adapter type is one of the Direct 3D lineage . -Using a D3D surface format not listed here is an error. -To extend the use of this extension to support media adapters beyond -DirectX 9 tables similar to the ones in this section will need to be defined -for the surface formats supported by the new media adapter. -All implementations that support this extension are required to support the -NV12 surface format, the other surface formats supported are the same -surface formats that the adapter you are sharing with supports as long as -they are listed in the table -<> or in the table -<>. - -[[cl_khr_dx9_media_sharing-fourcc-image-formats]] -._YUV FourCC codes and corresponding OpenCL image format_ -[cols=",",options="header",] -|==== -| *FOUR CC code* -| *CL image format* - -*(channel order, channel data type)* - -| FOURCC('N','V','1','2'), Plane 0 | `CL_R`, `CL_UNORM_INT8` -| FOURCC('N','V','1','2'), Plane 1 | `CL_RG`, `CL_UNORM_INT8` -| FOURCC('Y','V','1','2'), Plane 0 | `CL_R`, `CL_UNORM_INT8` -| FOURCC('Y','V','1','2'), Plane 1 | `CL_R`, `CL_UNORM_INT8` -| FOURCC('Y','V','1','2'), Plane 2 | `CL_R`, `CL_UNORM_INT8` -|==== - -In the table <> above, NV12 Plane 0 -corresponds to the luminance (Y) channel and Plane 1 corresponds to the UV -channels. -The YV12 Plane 0 corresponds to the Y channel, Plane 1 corresponds to the V -channel and Plane 2 corresponds to the U channel. -Note that the YUV formats map to `CL_R` and `CL_RG` but do not perform any YUV -to RGB conversion and vice-versa. - -[[cl_khr_dx9_media_sharing-d3d-image-formats]] -._Direct3D formats and corresponding OpenCL image formats_ -[cols=",",options="header",] -|==== -| *D3D format* -| *CL image format* + -*(channel order, channel data type)* - -| D3DFMT_R32F | `CL_R`, `CL_FLOAT` -| D3DFMT_R16F | `CL_R`, `CL_HALF_FLOAT` -| D3DFMT_L16 | `CL_R`, `CL_UNORM_INT16` -| D3DFMT_A8 | `CL_A`, `CL_UNORM_INT8` -| D3DFMT_L8 | `CL_R`, `CL_UNORM_INT8` -| | -| D3DFMT_G32R32F | `CL_RG`, `CL_FLOAT` -| D3DFMT_G16R16F | `CL_RG`, `CL_HALF_FLOAT` -| D3DFMT_G16R16 | `CL_RG`, `CL_UNORM_INT16` -| D3DFMT_A8L8 | `CL_RG`, `CL_UNORM_INT8` -| | -| D3DFMT_A32B32G32R32F | `CL_RGBA`, `CL_FLOAT` -| D3DFMT_A16B16G16R16F | `CL_RGBA`, `CL_HALF_FLOAT` -| D3DFMT_A16B16G16R16 | `CL_RGBA`, `CL_UNORM_INT16` -| D3DFMT_A8B8G8R8 | `CL_RGBA`, `CL_UNORM_INT8` -| D3DFMT_X8B8G8R8 | `CL_RGBA`, `CL_UNORM_INT8` -| D3DFMT_A8R8G8B8 | `CL_BGRA`, `CL_UNORM_INT8` -| D3DFMT_X8R8G8B8 | `CL_BGRA`, `CL_UNORM_INT8` -|==== - -Note: The D3D9 format names in the table above seem to imply that the -order of the color channels are switched relative to OpenCL but this is -not the case. -For example, the layout of channels for each pixel for D3DFMT_A32FB32FG32FR32F -is the same as `CL_RGBA`, `CL_FLOAT`. diff --git a/ext/cl_khr_egl_event.asciidoc b/ext/cl_khr_egl_event.asciidoc deleted file mode 100644 index d04fb0d8..00000000 --- a/ext/cl_khr_egl_event.asciidoc +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_egl_event]] -== Creating OpenCL Event Objects from EGL Sync Objects - -[[cl_khr_egl_event-overview]] -=== Overview - -This section describes the *cl_khr_egl_event* extension. -This extension allows creating OpenCL event objects linked to EGL fence sync -objects, potentially improving efficiency of sharing images and buffers -between the two APIs. -The companion *EGL_KHR_cl_event* extension provides the complementary -functionality of creating an EGL sync object from an OpenCL event object. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_egl_event-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_event clCreateEventFromEGLSyncKHR(cl_context context, - CLeglSyncKHR sync, - CLeglDisplayKHR display, - cl_int *errcode_ret); ----- - -[[cl_khr_egl_event-new-tokens]] -=== New Tokens - -Returned by clCreateEventFromEGLSyncKHR if _sync_ is not a valid EGLSyncKHR -handle created with respect to EGLDisplay _display_: - ----- -CL_INVALID_EGL_OBJECT_KHR ----- - -Returned by *clGetEventInfo* when _param_name_ is CL_EVENT_COMMAND_TYPE: - ----- -CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR ----- - -[[cl_khr_egl_event-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add following to the fourth paragraph of _section 5.11_ (prior to the -description of *clWaitForEvents*): - -"`Event objects can also be used to reflect the status of an EGL fence sync -object. -The sync object in turn refers to a fence command executing in an EGL client -API command stream. -This provides another method of coordinating sharing of EGL / EGL client API -objects with OpenCL. -Completion of EGL / EGL client API commands may be determined by placing an -EGL fence command after commands using eglCreateSyncKHR, creating an event -from the resulting EGL sync object using clCreateEventFromEGLSyncKHR and -then specifying it in the _event_wait_list_ of a clEnqueueAcquire*** -command. -This method may be considerably more efficient than calling operations like -glFinish, and is referred to as _explicit synchronization_. -The application is responsible for ensuring the command stream associated -with the EGL fence is flushed to ensure the CL queue is submitted to the -device. -Explicit synchronization is most useful when an EGL client API context bound -to another thread is accessing the memory objects.`" - -Add CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR to the valid _param_value_ values -returned by *clGetEventInfo* for _param_name_ CL_EVENT_COMMAND_TYPE (in the -third row and third column of _table 5.22_). - -Add new _subsection 5.11.2_: - -"`*5.11.2 Linking Event Objects to EGL Synchronization Objects* - -An event object may be created by linking to an EGL *sync object*. -Completion of such an event object is equivalent to waiting for completion -of the fence command associated with the linked EGL sync object. - -The function -indexterm:[clCreateEventFromEGLSyncKHR] -[source,opencl] ----- -cl_event clCreateEventFromEGLSyncKHR(cl_context context, - CLeglSyncKHR sync, - CLeglDisplayKHR display, - cl_int *errcode_ret) ----- - -creates a linked event object. - -_context_ is a valid OpenCL context created from an OpenGL context or share -group, using the *cl_khr_gl_sharing* extension. - -_sync_ is the name of a sync object of type EGL_SYNC_FENCE_KHR created with -respect to EGLDisplay _display_. - -*clCreateEventFromEGLSyncKHR* returns a valid OpenCL event object and -_errcode_ret_ is set to CL_SUCCESS if the event object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid context, or was not - created from a GL context. - * CL_INVALID_EGL_OBJECT_KHR if _sync_ is not a valid EGLSyncKHR object of - type EGL_SYNC_FENCE_KHR created with respect to EGLDisplay _display_. - -The parameters of an event object linked to an EGL sync object will return -the following values when queried with *clGetEventInfo*: - - * The CL_EVENT_COMMAND_QUEUE of a linked event is `NULL`, because the - event is not associated with any OpenCL command-queue. - * The CL_EVENT_COMMAND_TYPE of a linked event is - CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR, indicating that the event is - associated with a EGL sync object, rather than an OpenCL command. - * The CL_EVENT_COMMAND_EXECUTION_STATUS of a linked event is either - CL_SUBMITTED, indicating that the fence command associated with the sync - object has not yet completed, or CL_COMPLETE, indicating that the fence - command has completed. - -*clCreateEventFromEGLSyncKHR* performs an implicit *clRetainEvent* on the -returned event object. -Creating a linked event object also places a reference on the linked EGL -sync object. -When the event object is deleted, the reference will be removed from the EGL -sync object. - -Events returned from *clCreateEventFromEGLSyncKHR* may only be consumed by -*clEnqueueAcquire**** commands. -Passing such events to any other CL API that enqueues commands will generate -a CL_INVALID_EVENT error.`" - -[[cl_khr_egl_event-additions-to-extension-specification]] -=== Additions to the OpenCL Extension Specification - -Replace the second paragraph of -<> with: - -"`Prior to calling *clEnqueueAcquireGLObjects*, the application must ensure -that any pending EGL or EGL client API operations which access the objects -specified in _mem_objects_ have completed. - -If the *cl_khr_egl_event* extension is supported and the EGL context in -question supports fence sync objects, _explicit synchronization_ can be -achieved as set out in _section 5.7.1_. - -If the *cl_khr_egl_event* extension is not supported, completion of EGL -client API commands may be determined by issuing and waiting for completion -of commands such as glFinish or vgFinish on all client API contexts with -pending references to these objects. -Some implementations may offer other efficient synchronization methods. -If such methods exist they will be described in platform-specific -documentation. - -Note that no synchronization methods other than glFinish and vgFinish are -portable between all EGL client API implementations and all OpenCL -implementations. -While this is the only way to ensure completion that is portable to all -platforms, these are expensive operation and their use should be avoided if -the cl_khr_egl_event extension is supported on a platform.`" - -[[cl_khr_egl_event-issues]] -=== Issues - -Most issues are shared with *cl_khr_gl_event* and are resolved as described -in that extension. - - . Should we support implicit synchronization? -+ --- -RESOLVED: No, as this may be very difficult since the synchronization would -not be with EGL, it would be with currently bound EGL client APIs. -It would be necessary to know which client APIs might be bound, to validate -that they're associated with the EGLDisplay associated with the OpenCL -context, and to reach into each such context. --- - - . Do we need to have typedefs to use EGL handles in OpenCL? -+ --- -RESOLVED Using typedefs for EGL handles. --- - - . Should we restrict which CL APIs can be used with this cl_event? -+ --- -RESOLVED Use is limited to clEnqueueAcquire*** calls only. --- - - . What is the desired behaviour for this extension when EGLSyncKHR is of a - type other than EGL_SYNC_FENCE_KHR? -+ --- -RESOLVED This extension only requires support for EGL_SYNC_FENCE_KHR. -Support of other types is an implementation choice, and will result in -CL_INVALID_EGL_OBJECT_KHR if unsupported. --- diff --git a/ext/cl_khr_egl_image.asciidoc b/ext/cl_khr_egl_image.asciidoc deleted file mode 100644 index da56b9ea..00000000 --- a/ext/cl_khr_egl_image.asciidoc +++ /dev/null @@ -1,432 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_egl_image]] -== Creating OpenCL Memory Objects from EGL Images - -[[cl_khr_egl_image-overview]] -=== Overview - -This section describes the *cl_khr_egl_image* extension. -This extension provides a mechanism to creating OpenCL memory objects from -from EGLImages. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_egl_image-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_mem clCreateFromEGLImageKHR(cl_context context, - CLeglDisplayKHR display, - CLeglImageKHR image, - cl_mem_flags flags, - const cl_egl_image_properties_khr *properties, - cl_int *errcode_ret); - -cl_int clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) - -cl_int clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -[[cl_khr_egl_image-new-tokens]] -=== New Tokens - -New error codes: - ----- -CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -CL_INVALID_EGL_OBJECT_KHR ----- - -New command types: - ----- -CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR -CL_COMMAND_RELEASE_EGL_OBJECTS_KHR ----- - -[[cl_khr_egl_image-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -In section 5.2.4, add the following text after the paragraph defining -clCreateImage: - -"`The function -indexterm:[clCreateFromEGLImageKHR] -[source,opencl] ----- -cl_mem clCreateFromEGLImageKHR(cl_context context, - CLeglDisplayKHR display, - CLeglImageKHR image, - cl_mem_flags flags, - const cl_egl_image_properties_khr *properties, - cl_int *errcode_ret); ----- - -creates an EGLImage target of type cl_mem from the EGLImage source provided -as _image_. - -_display_ should be of type EGLDisplay, cast into the type CLeglDisplayKHR. - -_image_ should be of type EGLImageKHR, cast into the type CLeglImageKHR. -Assuming no errors are generated in this function, the resulting image -object will be an EGLImage target of the specified EGLImage _image_. -The resulting cl_mem is an image object which may be used normally by all -OpenCL operations. -This maps to an image2d_t type in OpenCL kernel code. - -_flags_ is a bit-field that is used to specify usage information about the -memory object being created. - -The possible values for _flags_ are: CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and -CL_MEM _READ_WRITE. - -For OpenCL 1.2 _flags_ also accepts: CL_MEM_HOST_WRITE_ONLY, -CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS. - -This extension only requires support for CL_MEM _READ_ONLY, and for OpenCL -1.2 CL_MEM_HOST_NO_ACCESS. -For OpenCL 1.1, a CL_INVALID_OPERATION will be returned for images which do -not support host mapping. - -If the value passed in _flags_ is not supported by the OpenCL implementation -it will return CL_INVALID_VALUE. -The accepted _flags_ may be dependent upon the texture format used. - -_properties_ specifies a list of property names and their corresponding -values. -Each property name is immediately followed by the corresponding desired -value. -The list is terminated with 0. -No properties are currently supported with this version of the extension. -_properties_ can be `NULL`. - -*clCreateFromEGLImageKHR* returns a valid non-zero OpenCL image object and -_errcode_ret_ is set to CL_SUCCESS if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid OpenCL context. - * CL_INVALID_VALUE if _properties_ contains invalid values, if _display_ - is not a valid display object or if _flags_ are not in the set defined - above. - * CL_INVALID_EGL_OBJECT_KHR if _image_ is not a valid EGLImage object. - * CL_IMAGE_FORMAT_NOT_SUPPORTED if the OpenCL implementation is not able - to create a cl_mem compatible with the provided CLeglImageKHR for an - implementation-dependent reason (this could be caused by, but not - limited to, reasons such as unsupported texture formats, etc). - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_INVALID_OPERATION if there are no devices in _context_ that support - images (i.e. CL_DEVICE_IMAGE_SUPPORT specified in table 4.3 is CL_FALSE) - or if the flags passed are not supported for that image type.`" - -[[cl_khr_egl_image-lifetime-of-shared-objects]] -==== Lifetime of Shared Objects - -An OpenCL memory object created from an EGL image remains valid according to -the lifetime behavior as described in EGL_KHR_image_base. - -"`Any EGLImage siblings exist in any client API context`" - -For OpenCL this means that while the application retains a reference on the -cl_mem (the EGL sibling), the image remains valid. - -[[cl_khr_egl_image-synchronizing-opengl-and-egl-access-to-shared-objects]] -==== Synchronizing OpenCL and EGL Access to Shared Objects - -In order to ensure data integrity, the application is responsible for -synchronizing access to shared CL/EGL objects by their respective APIs. -Failure to provide such synchronization may result in race conditions and -other undefined behavior including non-portability between implementations. - -Prior to calling clEnqueueAcquireEGLObjectsKHR, the application must ensure -that any pending operations which access the objects specified in -mem_objects have completed. -This may be accomplished in a portable way by ceasing all client operations -on the resource, and issuing and waiting for completion of a glFinish -command on all GL contexts with pending references to these objects. -Implementations may offer more efficient synchronization methods, such as -synchronization primitives or fence operations. - -Similarly, after calling clEnqueueReleaseEGLImageObjects, the application is -responsible for ensuring that any pending OpenCL operations which access the -objects specified in mem_objects have completed prior to executing -subsequent commands in other APIs which reference these objects. -This may be accomplished in a portable way by calling clWaitForEvents with -the event object returned by clEnqueueReleaseGLObjects, or by calling -clFinish. -As above, some implementations may offer more efficient methods. - -Attempting to access the data store of an EGLImage object after it has been -acquired by OpenCL and before it has been released will result in undefined -behavior. -Similarly, attempting to access a shared EGLImage object from OpenCL before -it has been acquired by the OpenCL command-queue or after it has been -released, will result in undefined behavior. - -[[cl_khr_egl_image-sharing-memory-objects-created-from-egl-resources-between-egldisplays-and-opencl-contexts]] -==== Sharing memory objects created from EGL resources between EGLDisplays and OpenCL contexts - -The function -indexterm:[clEnqueueAcquireEGLObjectsKHR] -[source,opencl] ----- -cl_int clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -is used to acquire OpenCL memory objects that have been created from EGL -resources. -The EGL objects are acquired by the OpenCL context associated with -_command_queue_ and can therefore be used by all command-queues associated -with the OpenCL context. - -OpenCL memory objects created from EGL resources must be acquired before -they can be used by any OpenCL commands queued to a command-queue. If an -OpenCL memory object created from a EGL resource is used while it is not -currently acquired by OpenCL, the behavior is undefined. Implementations -may fail the execution of commands attempting to use that OpenCL memory -object and set their associated event's execution status to -{CL_EGL_RESOURCE_NOT_ACQUIRED_KHR}. - - -_command_queue_ is a valid command-queue. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from EGL resources, within the context associate with command_queue. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -*clEnqueueAcquireEGLObjectsKHR* returns CL_SUCCESS if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does -nothing and returns CL_SUCCESS. -Otherwise it returns one of the following errors: - - * CL_INVALID_VALUE if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if num_objects > 0 and mem_objects is `NULL`. - * CL_INVALID_MEM_OBJECT if memory objects in _mem_objects_ are not valid - OpenCL memory objects in the context associated with _command_queue_. - * CL_INVALID_EGL_OBJECT_KHR if memory objects in _mem_objects_ have not - been created from EGL resources. - * CL_INVALID_COMMAND_QUEUE if _command_queue_ is not a valid - command-queue. - * CL_INVALID_EVENT_WAIT_LIST if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function -indexterm:[clEnqueueReleaseEGLObjectsKHR] -[source,opencl] ----- -cl_int clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -is used to release OpenCL memory objects that have been created from EGL -resources. -The EGL objects are released by the OpenCL context associated with -. - -OpenCL memory objects created from EGL resources which have been acquired by -OpenCL must be released by OpenCL before they may be accessed by EGL or by -EGL client APIs. -Accessing a EGL resource while its corresponding OpenCL memory object is -acquired is in error and will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -_command_queue_ is a valid command-queue. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of OpenCL memory objects that were -created from EGL resources, within the context associate with command_queue. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command and -can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -*clEnqueueReleaseEGLObjectsKHR* returns CL_SUCCESS if the function is -executed successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` then the function does -nothing and returns CL_SUCCESS. -Otherwise it returns one of the following errors: - - * CL_INVALID_VALUE if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if num_objects > 0 and mem_objects is `NULL`. - * CL_INVALID_MEM_OBJECT if memory objects in _mem_objects_ are not valid - OpenCL memory objects in the context associated with _command_queue_. - * CL_INVALID_EGL_OBJECT_KHR if memory objects in _mem_objects_ have not - been created from EGL resources. - * CL_INVALID_COMMAND_QUEUE if _command_queue_ is not a valid - command-queue. - * CL_INVALID_EVENT_WAIT_LIST if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_egl_image-event-command-types]] -==== Event Command Types for Sharing memory objects created from EGL resources - -The following table describes the event command types for the OpenCL commands -to acquire and release OpenCL memory objects that have been created from -EGL resources: - -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* - -| {clEnqueueAcquireEGLObjectsKHR} -| {CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR_anchor} - -| {clEnqueueReleaseEGLObjectsKHR} -| {CL_COMMAND_RELEASE_EGL_OBJECTS_KHR_anchor} - -|==== - -[[cl_khr_egl_image-issues]] -=== Issues - - . This extension does not support reference counting of the images, so the - onus is on the application to behave sensibly and not release the - underlying cl_mem object while the EGLImage is still being used. - . In order to ensure data integrity, the application is responsible for - synchronizing access to shared CL/EGL image objects by their respective - APIs. - Failure to provide such synchronization may result in race conditions - and other undefined behavior. - This may be accomplished by calling clWaitForEvents with the event - objects returned by any OpenCL commands which use the shared image - object or by calling clFinish. - . Currently CL_MEM_READ_ONLY is the only supported flag for _flags_. -+ --- -RESOLVED: Implementation will now return an error if writing to a shared -object that is not supported rather than disallowing it entirely. --- - . Currently restricted to 2D image objects. - . What should happen for YUV color-space conversion, multi plane images, - and chroma-siting, and channel mapping? -+ --- -RESOLVED: YUV is no longer explicitly described in this extension. -Before this removal the behavior was dependent on the platform. -This extension explicitly leaves the YUV layout to the platform and EGLImage -source extension (i.e. is implementation specific). -Colorspace conversion must be applied by the application using a color -conversion matrix. - -The expected extension path if YUV color-space conversion is to be supported -is to introduce a YUV image type and provide overloaded versions of the -read_image built-in functions. - -Getting image information for a YUV image should return the original image -size (non quantized size) when all of Y U and V are present in the image. -If the planes have been separated then the actual dimensionality of the -separated plane should be reported. -For example with YUV 4:2:0 (NV12) with a YUV image of 256x256, the Y only -image would return 256x256 whereas the UV only image would return 128x128. --- - . Should an attribute list be used instead? -+ --- -RESOLVED: function has been changed to use an attribute list. --- - . What should happen for EGLImage extensions which introduce formats - without a mapping to an OpenCL image channel data type or channel order? -+ --- -RESOLVED: This extension does not define those formats. -It is expected that as additional EGL extensions are added to create EGL -images from other sources, an extension to CL will be introduced where -needed to represent those image types. --- - . What are the guarantees to synchronization behavior provided by the - implementation? -+ --- -The basic portable form of synchronization is to use a clFinish, as is the -case for GL interop. -In addition implementations which support the synchronization extensions -cl_khr_egl_event and EGL_KHR_cl_event can interoperate more efficiently as -described in those extensions. --- diff --git a/ext/cl_khr_expect_assume.asciidoc b/ext/cl_khr_expect_assume.asciidoc deleted file mode 100644 index 274d73b1..00000000 --- a/ext/cl_khr_expect_assume.asciidoc +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_expect_assume]] -== Kernel Optimization Hints - -This extension adds mechanisms to provide information to the compiler that may improve the performance of some kernels. -Specifically, this extension adds the ability to: - -* Tell the compiler the _expected_ value of a variable. -* Allow the compiler to _assume_ a condition is true. - -These functions are not required for functional correctness. - -The initial version of this extension extends the OpenCL SPIR-V environment to support new instructions for offline compilation tool chains. -Similar functionality may be provided by some OpenCL C online compilation tool chains, but formal support in OpenCL C is not required by the initial version of the extension. - -=== General Information - -==== Name Strings - -`cl_khr_expect_assume` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-11-10 | 1.0.0 | First assigned version. -|==== - -==== Dependencies - -This extension is written against the OpenCL Specifications Version V3.0.8. - -The initial version of this extension extends the OpenCL SPIR-V environment to support new instructions. -Please refer to the OpenCL SPIR-V Environment Specification that describes how this extension modifies the OpenCL SPIR-V environment. - -=== Sample Code - -Although this extension does not formally extend OpenCL C, the ability to provide _expect_ and _assume_ information is supported by many OpenCL C compiler tool chains. -The sample code below describes how to test for and provide _expect_ and _assume_ information to compilers based on Clang: - -[source,opencl_c] ----- -// __has_builtin is an optional compiler feature that is supported by Clang. -// If this feature is not supported, we will assume the builtin is not present. -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - -kernel void test(global int* dst, global int* src) -{ - int value = src[get_global_id(0)]; - - // Tell the compiler that the most likely source value is zero. -#if __has_builtin(__builtin_expect) - value = __builtin_expect(value, 0); -#endif - - // Tell the compiler that the source value is non-negative. - // Behavior is undefined if the source value is actually negative. -#if __has_builtin(__builtin_assume) - __builtin_assume(value >= 0); -#endif - - dst[get_global_id(0)] = value % 4; -} ----- diff --git a/ext/cl_khr_extended_async_copies.asciidoc b/ext/cl_khr_extended_async_copies.asciidoc deleted file mode 100644 index 9bde9244..00000000 --- a/ext/cl_khr_extended_async_copies.asciidoc +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_extended_async_copies]] -== Extended Async Copies - -This section describes the *cl_khr_extended_async_copies* extension. -This extension augments built-in asynchronous copy functions to OpenCL C -to support more patterns: - -1. for async copy between 2D source and 2D destination. -2. for async copy between 3D source and 3D destination. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 0.9.0 | First assigned version (provisional). -| 2021-09-06 | 0.9.1 | Elements-based proposal update. -| 2021-11-10 | 1.0.0 | First non-provisional version. -|==== - -[[cl_khr_extended_async_copies-additions-to-chapter-6-of-the-opencl-specification]] -=== Additions to Chapter 6 of the OpenCL C Specification - -The following new built-in functions are added to the _Async Copies from Global to -Local Memory, Local to Global Memory, and Prefetch_ functions described in _section 6.12.10_ -and _section 6.13.10_ of the OpenCL 1.2 and OpenCL 2.0 C specifications. - -Note that *async_work_group_strided_copy* is a special case of -*async_work_group_copy_2D2D*, namely one which copies a single column to a -single line or vice versa. -For example: + -`async_work_group_strided_copy(dst, src, num_gentypes, src_stride, event)` is equal to -`async_work_group_copy_2D2D(dst, 0, src, 0, sizeof(gentype), 1, num_gentypes, src_stride, 1, event)` - -The async copy built-in functions described in this section support arbitrary -`gentype`-based buffers by casting pointers to `void*`. - -These async copy built-in functions do not perform any implicit synchronization -of source data such as using a *barrier* before performing the copy. - -These async copy built-in functions are performed by all work-items in a -work-group and must therefore be encountered by all work-items in a work-group -executing the kernel with the same argument values; otherwise the results are -undefined. - -The _src_offset_, _dst_offset_, _src_total_line_length_, -_dst_total_line_length_, _src_total_plane_area_ and _dst_total_plane_area_ -function arguments are expressed in elements. - -Both _src_total_line_length_ and _dst_total_line_length_ describe the number of -elements between the beginning of the current line and the beginning of the next -line. - -Both _src_total_plane_area_ and _dst_total_plane_area_ describe the number of -elements between the beginning of the current plane and the beginning of the -next plane. - -These async copy built-in functions return an event object that can be used by -*wait_group_events* to wait for the async copy to finish. The _event_ argument -can also be used to associate the async copy with a previous async copy allowing -an event to be shared by multiple async copies; otherwise _event_ should be -zero. If the _event_ argument is non-zero, the event object supplied as the -_event_ argument will be returned. - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* |*Description* -|[source,opencl_c] ----- -event_t async_work_group_copy_2D2D( - __local void *dst, - size_t dst_offset, - const __global void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t src_total_line_length, - size_t dst_total_line_length, - event_t event) - -event_t async_work_group_copy_2D2D( - __global void *dst, - size_t dst_offset, - const __local void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t src_total_line_length, - size_t dst_total_line_length, - event_t event) ----- -| Perform an async copy of (_num_elements_per_line_ * _num_lines_) elements -of size _num_bytes_per_element_ from -(_src_ + (_src_offset_ * _num_bytes_per_element_)) to -(_dst_ + (_dst_offset_ * _num_bytes_per_element_)). All pointer arithmetic -is performed with implicit casting to `char*` by the implementation. -Each line contains _num_elements_per_line_ elements of size -_num_bytes_per_element_. -After each line of transfer, the _src_ address is incremented by -_src_total_line_length_ elements -(i.e. _src_total_line_length_ * _num_bytes_per_element_ bytes), -and the _dst_ address is incremented by _dst_total_line_length_ elements -(i.e. _dst_total_line_length_ * _num_bytes_per_element_ bytes), -for the next line of transfer. - -The behavior of *async_work_group_copy_2D2D* is undefined if the -source or destination addresses exceed the upper bounds of the address space -during the copy. - -The behavior of *async_work_group_copy_2D2D* is also undefined if the -_src_total_line_length_ or _dst_total_line_length_ values are smaller -than _num_elements_per_line_, i.e. overlapping of lines is undefined. - -|[source,opencl_c] ----- -event_t async_work_group_copy_3D3D( - __local void *dst, - size_t dst_offset, - const __global void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t num_planes, - size_t src_total_line_length, - size_t src_total_plane_area, - size_t dst_total_line_length, - size_t dst_total_plane_area, - event_t event) - -event_t async_work_group_copy_3D3D( - __global void *dst, - size_t dst_offset, - const __local void *src, - size_t src_offset, - size_t num_bytes_per_element, - size_t num_elements_per_line, - size_t num_lines, - size_t num_planes, - size_t src_total_line_length, - size_t src_total_plane_area, - size_t dst_total_line_length, - size_t dst_total_plane_area, - event_t event) ----- -| Perform an async copy of \((_num_elements_per_line_ * _num_lines_) * _num_planes_) elements -of size _num_bytes_per_element_ from -(_src_ + (_src_offset_ * _num_bytes_per_element_)) to -(_dst_ + (_dst_offset_ * _num_bytes_per_element_)), -arranged in _num_planes_ planes. All pointer arithmetic -is performed with implicit casting to `char*` by the implementation. -Each plane contains _num_lines_ lines. -Each line contains _num_elements_per_line_ elements. -After each line of transfer, the _src_ address is incremented by -_src_total_line_length_ elements -(i.e. _src_total_line_length_ * _num_bytes_per_element_ bytes), -and the _dst_ address is incremented by _dst_total_line_length_ elements -(i.e. _dst_total_line_length_ * _num_bytes_per_element_ bytes), -for the next line of transfer. - -The behavior of *async_work_group_copy_3D3D* is undefined if the -source or destination addresses exceed the upper bounds of the address space -during the copy. - -The behavior of *async_work_group_copy_3D3D* is also undefined if the -_src_total_line_length_ or _dst_total_line_length_ values are smaller -than _num_elements_per_line_, i.e. overlapping of lines is undefined. - -The behavior of *async_work_group_copy_3D3D* is also undefined if -_src_total_plane_area_ is smaller than (_num_lines_ * _src_total_line_length_), -or _dst_total_plane_area_ is smaller than (_num_lines_ * _dst_total_line_length_), -i.e. overlapping of planes is undefined. - -|======================================================================= diff --git a/ext/cl_khr_extended_bit_ops.asciidoc b/ext/cl_khr_extended_bit_ops.asciidoc deleted file mode 100644 index b65194cf..00000000 --- a/ext/cl_khr_extended_bit_ops.asciidoc +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_extended_bit_ops]] -== Extended Bit Operations - -This extension adds OpenCL C functions for performing extended bit operations. -Specifically, the following functions are added: - -* bitfield insert: insert bits from one source operand into another source operand. -* bitfield extract: extract bits from a source operand, with sign- or zero-extension. -* bit reverse: reverse the bits of a source operand. - -=== General Information - -==== Name Strings - -`cl_khr_extended_bit_ops` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-04-22 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL 3.0 C Language Specification and the OpenCL SPIR-V Environment Specification Version V3.0.6. - -This extension requires OpenCL 1.0. - -=== New OpenCL C Functions - -[source,opencl_c] ----- -gentype bitfield_insert( gentype base, gentype insert, uint offset, uint count ) -igentype bitfield_extract_signed( gentype base, uint offset, uint count ) -ugentype bitfield_extract_unsigned( gentype base, uint offset, uint count ) -gentype bit_reverse( gentype base ) ----- - -=== Modifications to the OpenCL C Specification - -==== Modify Section 6.15.3. Integer Functions: - -Add a new Section 6.15.3.X. Extended Bit Operations: :: -+ --- -The functions described in the following table can be used with built-in scalar or vector integer types to perform extended bit operations. -The functions that operate on vector types operate component-wise. -The description is per-component. - -In the table below, the generic type name `gentype` refers to the built-in integer types `char`, `char__n__`, `uchar`, `uchar__n__`, `short`, `short__n__`, `ushort`, `ushort__n__`, `int`, `int__n__`, `uint`, `uint__n__`, `long`, `long__n__`, `ulong`, and `ulong__n__`. -The generic type name `igentype` refers to the built-in signed integer types `char`, `char__n__`, `short`, `short__n__`, `int`, `int__n__`, `long`, and `long__n__`. -The generic type name `ugentype` refers to the built-in unsigned integer types `uchar`, `uchar__n__`, `ushort`, `ushort__n__`, `uint`, `uint__n__`, `ulong`, and `ulong__n__`. -_n_ is 2, 3, 4, 8, or 16. - -.Built-in Scalar and Vector Extended Bit Operations -[cols="1a,1", options="header"] -|=== -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype bitfield_insert( - gentype base, gentype insert, - uint offset, uint count) ----- - -|Returns a copy of _base_, with a modified bitfield that comes from _insert_. - -Any bits of the result value numbered outside [_offset_, _offset_ + _count_ - 1] (inclusive) will come from the corresponding bits in _base_. - -Any bits of the result value numbered inside [_offset_, _offset_ + _count_ - 1] (inclusive) will come from the bits numbered [0, _count_ - 1] (inclusive) of _insert_. - -_count_ is the number of bits to be modified. -If _count_ equals 0, the return value will be equal to _base_. - -If _count_ or _offset_ or _offset_ + _count_ is greater than number of bits in `gentype` (for scalar types) or components of `gentype` (for vector types), the result is undefined. - -|[source,opencl_c] ----- -igentype bitfield_extract_signed( - gentype base, - uint offset, uint count) ----- - -|Returns an extracted bitfield from _base_ with sign extension. -The type of the return value is always a signed type. - -The bits of _base_ numbered in [_offset_, _offset_ + _count_ - 1] (inclusive) are returned as the bits numbered in [0, _count_ - 1] (inclusive) of the result. -The remaining bits in the result will be sign extended by replicating the bit numbered _offset_ + _count_ - 1 of _base_. - -_count_ is the number of bits to be extracted. -If _count_ equals 0, the result is 0. - -If the _count_ or _offset_ or _offset_ + _count_ is greater than number of bits in `gentype` (for scalar types) or components of `gentype` (for vector types), the result is undefined. - -|[source,opencl_c] ----- -ugentype bitfield_extract_unsigned( - gentype base, - uint offset, uint count) ----- - -|Returns an extracted bitfield from _base_ with zero extension. -The type of the return value is always an unsigned type. - -The bits of _base_ numbered in [_offset_, _offset_ + _count_ - 1] (inclusive) are returned as the bits numbered in [0, _count_ - 1] (inclusive) of the result. -The remaining bits in the result will be zero. - -_count_ is the number of bits to be extracted. -If _count_ equals 0, the result is 0. - -If the _count_ or _offset_ or _offset_ + _count_ is greater than number of bits in `gentype` (for scalar types) or components of `gentype` (for vector types), the result is undefined. - -|[source,opencl_c] ----- -gentype bit_reverse( - gentype base) ----- - -|Returns the value of _base_ with reversed bits. -That is, the bit numbered _n_ of the result value will be taken from the bit numbered _width_ - _n_ - 1 of _base_ (for scalar types) or a component of _base_ (for vector types), where _width_ is number of bits of `gentype` (for scalar types) or components of `gentype` (for vector types). - -|=== --- diff --git a/ext/cl_khr_extended_versioning.asciidoc b/ext/cl_khr_extended_versioning.asciidoc deleted file mode 100644 index 115f5ce7..00000000 --- a/ext/cl_khr_extended_versioning.asciidoc +++ /dev/null @@ -1,283 +0,0 @@ -// Copyright 2019-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_extended_versioning]] -== Extended versioning - -This extension introduces new platform and device queries that return detailed -version information to applications. It makes it possible to return the exact -revision of the specification or intermediate languages supported by an -implementation. It also enables implementations to communicate a version -number for each of the extensions they support and remove the requirement -for applications to process strings to test for the presence of an extension or -intermediate language or built-in kernel. - -Extended versioning was promoted to a core feature in OpenCL 3.0, however note -that the query for {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} was replaced by the -query for {CL_DEVICE_OPENCL_C_ALL_VERSIONS}. - -=== General Information - -==== Name Strings - -`cl_khr_extended_versioning` - -==== Contributors - -Kévin Petit, Arm Ltd. + -Ben Ashbaugh, Intel + -Alastair Murray, Codeplay Software Ltd. + -Einar Hov, Arm Ltd. - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-02-12 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL Specification -Version 2.2, Revision 11. - -This extension requires OpenCL 1.0. - -=== New API Types - -==== Version - -This extension introduces a new scheme to encode detailed -(major, minor, patch/revision) version information into a single 32-bit unsigned -integer: - -* The major version is using bits 31-22 -* The minor version is using bits 21-12 -* The patch version is using bits 11-0 - -This scheme enables two versions to be ordered using the standard C/C++ operators. -Macros are provided to extract individual fields or compose a full version -from the individual fields. - -[source,opencl] ----- - -typedef cl_uint cl_version_khr; - -#define CL_VERSION_MAJOR_BITS_KHR (10) -#define CL_VERSION_MINOR_BITS_KHR (10) -#define CL_VERSION_PATCH_BITS_KHR (12) - -#define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1) -#define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1) -#define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1) - -#define CL_VERSION_MAJOR_KHR(version) \ - ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) -#define CL_VERSION_MINOR_KHR(version) \ - (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR) -#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR) - -#define CL_MAKE_VERSION_KHR(major, minor, patch) \ - ((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \ - (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ - ((patch) & CL_VERSION_PATCH_MASK_KHR)) ----- - -==== Name and version - -This extension adds a structure that can be used to describe a combination of a -name alongside a version number: - -[source,opencl] ----- -#define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64 - -typedef struct _cl_name_version_khr { - cl_version_khr version; - char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR]; -} cl_name_version_khr; ----- - -The `name` field is an array of {CL_NAME_VERSION_MAX_NAME_SIZE_KHR} bytes used as -storage for a NUL-terminated string whose maximum length is therefore -{CL_NAME_VERSION_MAX_NAME_SIZE_KHR} `- 1`. - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetPlatformInfo}: - -[source,opencl] ----- -CL_PLATFORM_NUMERIC_VERSION_KHR -CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR ----- - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo}: - -[source,opencl] ----- -CL_DEVICE_NUMERIC_VERSION_KHR -CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR -CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR -CL_DEVICE_ILS_WITH_VERSION_KHR -CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR ----- - -=== Modifications to the OpenCL API Specification - -(Modify Section 4.1, *Querying Platform Info*) :: -+ --- - -(Add the following to Table 3, _List of supported param_names by {clGetPlatformInfo}_) :: -+ -[cols="3,2,3",options="header"] -|==== -| Platform Info -| Return Type -| Description - -| {CL_PLATFORM_NUMERIC_VERSION_KHR} -| {cl_version_khr_TYPE} -| Returns detailed (major, minor, patch) numeric version information. The major - and minor version numbers returned must match those returned via - {CL_PLATFORM_VERSION}. - -| {CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR} -| {cl_name_version_khr_TYPE}[] -| Returns an array of description (name and version) structures. The same - extension name must not be reported more than once. The list of extensions - reported must match the list reported via {CL_PLATFORM_EXTENSIONS}. - -|==== --- - -(Modify Section 4.2, *Querying Devices*) :: -+ --- - -(Add the following to Table 5, _List of supported param_names by {clGetDeviceInfo}_) :: -+ -[cols="3,2,3",options="header"] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_NUMERIC_VERSION_KHR} -| {cl_version_khr_TYPE} -| Returns detailed (major, minor, patch) numeric version information. The major - and minor version numbers returned must match those returned via - {CL_DEVICE_VERSION}. - -| {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} -| {cl_version_khr_TYPE} -| Returns detailed (major, minor, patch) numeric version information. The major - and minor version numbers returned must match those returned via - {CL_DEVICE_OPENCL_C_VERSION}. - -| {CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR} -| {cl_name_version_khr_TYPE}[] -| Returns an array of description (name and version) structures. The same - extension name must not be reported more than once. The list of extensions - reported must match the list reported via {CL_DEVICE_EXTENSIONS}. - -| {CL_DEVICE_ILS_WITH_VERSION_KHR} -| {cl_name_version_khr_TYPE}[] -| Returns an array of descriptions (name and version) for all supported - Intermediate Languages. Intermediate Languages with the same name may be - reported more than once but each name and major/minor version combination - may only be reported once. The list of intermediate languages reported must - match the list reported via {CL_DEVICE_IL_VERSION}. - -| {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} -| {cl_name_version_khr_TYPE}[] -| Returns an array of descriptions for the built-in kernels supported by the device. - Each built-in kernel may only be reported once. The list of reported kernels must - match the list returned via {CL_DEVICE_BUILT_IN_KERNELS}. - -|==== --- - -=== Conformance tests - -. Each of the new queries described in this extension must be attempted and - succeed. -. It must be verified that the information returned by all queries that - extend existing queries is consistent with the information returned - by existing queries. -. Some of the queries introduced by this extension impose uniqueness constraints - on the list of returned values. It must be verified that these constraints are - satisfied. - -=== Issues - -. What compatibility policy should we define? e.g. a _revision_ has to be - backwards-compatible with previous ones -+ --- -*RESOLVED*: No general rules as that wouldn't be testable. Here's a recommended policy: - -- Patch version bump: only clarifications and small/obvious bugfixes. -- Minor version bump: backwards-compatible changes only. -- Major version bump: backwards compatibility may break. - --- - -. Do we want versioning for built-in kernels as returned by {CL_DEVICE_BUILT_IN_KERNELS}? -+ --- -*RESOLVED*: No immediate use-case for versioning but being able to get a list of - individual kernels without parsing a string is desirable. Adding - {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR}. --- - -. What is the behaviour of the queries that return an array of structures when -there are no elements to return? -+ --- -*RESOLVED*: The query succeeds and the size returned is zero. --- - -. What value should be returned when version information is not available? -+ --- -*RESOLVED*: If a patch version is not available, it should be reported as 0. - If no version information is available, 0.0.0 should be reported. - These values have been chosen as they are guaranteed to be lower - than or equal to any other version. --- - -. Should we add a query to report SPIR-V extended instruction sets? -+ --- -*RESOLVED*: It is unlikely that we will introduce many SPIR-V extended - instruction sets without an accompanying API extension. Decided - not to do this. --- - -. Should the queries for which the old-style query doesn't exist in a given -OpenCL version be present (e.g. {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} -prior to OpenCL 2.1 or without support for `cl_khr_il_program` or -{CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} on OpenCL 1.0)? -+ --- -*RESOLVED*: All the queries are always present. - {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR} returns an empty set - when Intermediate Languages are not supported. - {CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR} always returns 1.0 on an - OpenCL 1.0 platform. --- - -. Is reporting multiple Intermediate Languages with the same name and major/minor -versions but differing patch versions allowed? -+ --- -*RESOLVED*: No. This isn't aligned with the intended use for patch versions and - makes it harder for implementations to guarantee consistency with - the existing IL queries. --- - diff --git a/ext/cl_khr_external_memory.asciidoc b/ext/cl_khr_external_memory.asciidoc deleted file mode 100644 index 3eeae890..00000000 --- a/ext/cl_khr_external_memory.asciidoc +++ /dev/null @@ -1,608 +0,0 @@ -// Copyright 2021-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_external_memory]] -== External Memory (Provisional) - -This extension defines a generic mechanism to share buffer and image objects between OpenCL and many other APIs. - -In particular, the `cl_khr_external_memory` extension defines: - -* Optional properties to import external memory exported by other APIs into OpenCL for a set of devices. - -* Routines to explicitly hand off memory ownership between OpenCL and other APIs. - -Other related extensions define specific external memory types that may be imported into OpenCL. - -=== General Information - -==== Name Strings - -`cl_khr_external_memory` + -`cl_khr_external_memory_dma_buf` + -`cl_khr_external_memory_dx` + -`cl_khr_external_memory_opaque_fd` + -`cl_khr_external_memory_win32` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-09-10 | 0.9.0 | Initial version (provisional). -| 2023-05-04 | 0.9.1 | Clarified device handle list enum cannot be specified without an external memory handle (provisional). -| 2023-08-01 | 0.9.2 | Changed device handle list enum to the memory-specific {CL_MEM_DEVICE_HANDLE_LIST_KHR} (provisional). -| 2023-08-29 | 0.9.3 | Added query for {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} (provisional). -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension is written against the OpenCL Specification Version 3.0.8. - -Because this extension adds new properties for {clCreateBufferWithProperties} -and {clCreateImageWithProperties} this extension requires OpenCL 3.0. - -==== Contributors - -// spell-checker: disable -Ajit Hakke-Patil, NVIDIA + -Amit Rao, NVIDIA + -Balaji Calidas, QUALCOMM + -Ben Ashbaugh, INTEL + -Carsten Rohde, NVIDIA + -Christoph Kubisch, NVIDIA + -Debalina Bhattacharjee, NVIDIA + -Faith Ekstrand, INTEL + -James Jones, NVIDIA + -Jeremy Kemp, IMAGINATION + -Joshua Kelly, QUALCOMM + -Karthik Raghavan Ravi, NVIDIA + -Kedar Patil, NVIDIA + -Kevin Petit, ARM + -Nikhil Joshi, NVIDIA + -Sharan Ashwathnarayan, NVIDIA + -Vivek Kini, NVIDIA + -// spell-checker: enable - -=== New Types - -[source] ----- -typedef cl_uint cl_external_memory_handle_type_khr; ----- - -=== New API Functions - -[source] ----- -cl_int clEnqueueAcquireExternalMemObjectsKHR( - cl_command_queue command_queue, - cl_uint num_mem_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clEnqueueReleaseExternalMemObjectsKHR( - cl_command_queue command_queue, - cl_uint num_mem_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); ----- - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetPlatformInfo} to query external memory handle types that may be imported by all devices in an OpenCL platform: - -[source] ----- -CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x2044 ----- - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo} to query external memory handle types that may be imported by an OpenCL device: - -[source] ----- -CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x204F -CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR 0x2052 ----- - -New properties accepted as _properties_ to {clCreateBufferWithProperties} and {clCreateImageWithProperties}: - -[source] ----- -CL_MEM_DEVICE_HANDLE_LIST_KHR 0x2051 -CL_MEM_DEVICE_HANDLE_LIST_END_KHR 0 ----- - -New return values from {clGetEventInfo} when _param_name_ is {CL_EVENT_COMMAND_TYPE}: - -[source] ----- -CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR 0x2047 -CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR 0x2048 ----- - -External memory handle type added by `cl_khr_external_memory_dma_buf`: - -[source] ----- -CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR 0x2067 ----- - -External memory handle types added by `cl_khr_external_memory_dx`: - -[source] ----- -CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR 0x2063 -CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR 0x2064 -CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR 0x2065 -CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR 0x2066 ----- - -External memory handle type added by `cl_khr_external_memory_opaque_fd`: - -[source] ----- -CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR 0x2060 ----- - -External memory handle types added by `cl_khr_external_memory_win32`: - -[source] ----- -CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR 0x2061 -CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2062 ----- - -=== Modifications to existing APIs added by this spec - -Following new enums are added to the list of supported _param_names_ by {clGetPlatformInfo}: - -.List of supported param_names by clGetPlatformInfo -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Platform Info | Return Type | Description -| {CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} - | {cl_external_memory_handle_type_khr_TYPE}[] - | Returns the list of importable external memory handle types supported by all devices in _platform_. -|==== - -{clGetPlatformInfo} when called with _param_name_ {CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} must return a common list of external memory handle types supported by all devices in the platform. - -Following new enums are added to the list of supported _param_names_ by {clGetDeviceInfo}: - -.List of supported param_names by clGetDeviceInfo -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Device Info | Return Type | Description -| {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} - | {cl_external_memory_handle_type_khr_TYPE}[] - | Returns the list of importable external memory handle types supported by _device_. -| {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} - | {cl_external_memory_handle_type_khr_TYPE}[] - | Returns the list of importable external memory handle types supported by _device_, that are assumed to apply linear layout to imported images when no other tiling information is provided. -|==== - -{clGetDeviceInfo} when called with param_name {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR} must return a non-empty list of external memory handle types for at least one of the devices in the platform. - -{clGetDeviceInfo} when called with param_name {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} returns a list of external memory handle types that are assumed to have a linear memory layout when no other tiling information is provided. This list contains a subset of {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR}. The returned list may be empty. - -External memory handle types not in {CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR} may have any memory layout. The layout interpretation of images imported with these handle types is implementation defined. - -Following new properties are added to the list of supported properties by {clCreateBufferWithProperties} and {clCreateImageWithProperties}. - -[[external-memory-properties-table]] -.List of supported buffer and image creation properties -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Property | Property Value | Description -| {CL_MEM_DEVICE_HANDLE_LIST_KHR} - | {cl_device_id_TYPE}[] - | Specifies the list of OpenCL devices (terminated with {CL_MEM_DEVICE_HANDLE_LIST_END_KHR}) to associate with the external memory handle. -|==== - -If {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of _properties_, the memory object created by {clCreateBufferWithProperties} or {clCreateImageWithProperties} is by default accessible to all devices in the _context_. - -The properties used to create a buffer or image from an external memory handle are described by related extensions. -When a buffer or image is created from an external memory handle, the _flags_ used to specify usage information for the buffer or image must not include {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or {CL_MEM_COPY_HOST_PTR}, and the _host_ptr_ argument must be `NULL`. -When images are created from an external memory handle, implementations may acquire information about image attributes such as format and layout at the time of creation. When such information is acquired at image creation time, it is used for the lifetime of the image object. - -Add to the list of error conditions for {clCreateBufferWithProperties} and {clCreateImageWithProperties}: - -* {CL_INVALID_DEVICE} - ** if a device identified by the property {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not a valid device or is not associated with _context_, or - ** if a device identified by property {CL_MEM_DEVICE_HANDLE_LIST_KHR} cannot import the requested external memory object type, or - ** if {CL_MEM_DEVICE_HANDLE_LIST_KHR} is not specified as part of _properties_ and one or more devices in _context_ cannot import the requested external memory object type. -* {CL_INVALID_VALUE} - ** if _properties_ includes a supported external memory handle and _flags_ includes {CL_MEM_USE_HOST_PTR}, {CL_MEM_ALLOC_HOST_PTR}, or {CL_MEM_COPY_HOST_PTR}. -* {CL_INVALID_HOST_PTR} - ** if _properties_ includes a supported external memory handle and _host_ptr_ is not `NULL`. -* {CL_INVALID_PROPERTY} - ** if _properties_ does not include a supported external memory handle and {CL_MEM_DEVICE_HANDLE_LIST_KHR} is specified as part of _properties_. - -Add images created from an external memory handle to the description of `image_row_pitch` and `image_slice_pitch` for {cl_image_desc_TYPE}: - -* `image_row_pitch` is the scan-line pitch in bytes. -The `image_row_pitch` must be zero if _host_ptr_ is `NULL`, the image is not a -2D image created from a buffer, and the image is not an image created from an -external memory handle. -If `image_row_pitch` is zero and _host_ptr_ is not `NULL` then the image row -pitch is calculated as `image_width` {times} the size of an image element in -bytes. -If `image_row_pitch` is zero and the image is created from an external memory -handle then the image row pitch is implementation-defined. -The image row pitch must be {geq} `image_width` {times} the size of an image -element in bytes and must be a multiple of the size of an image element in -bytes. -For a 2D image created from a buffer the image row pitch must also be a multiple -of the maximum of the {CL_DEVICE_IMAGE_PITCH_ALIGNMENT} value for all devices in -the context that support images. - -* `image_slice_pitch` is the size in bytes of each 2D slice in a 3D image or the -size in bytes of each image in a 1D or 2D image array. -The `image_slice_pitch` must be zero if _host_ptr_ is `NULL` and the image is -not created from an external memory handle. -If `image_slice_pitch` is zero and _host_ptr_ is not `NULL` then the image slice -pitch is calculated as the image row pitch {times} `image_height` for a 2D image -array or a 3D image, and as the image row pitch for a 1D image array. -If `image_slice_pitch` is zero and the image is created from an external memory -handle then the image slice pitch is implementation-defined. -The image slice pitch must be {geq} the image image row pitch {times} -`image_height` for a 2D image array or a 3D image, must be {geq} the image row -pitch for a 1D image array, and must be a multiple of the image row pitch. - -=== Description of new types added by this spec - -The following new APIs are added as part of this spec. The details of each are described below: - -==== Acquiring and Releasing External Memory Objects - -To enqueue a command to acquire OpenCL memory objects created from external memory handles, call the function - -include::{generated}/api/protos/clEnqueueAcquireExternalMemObjectsKHR.txt[] - -_command_queue_ specifies a valid command-queue. - -_num_mem_objects_ specifies the number of memory objects to acquire. - -_mem_objects_ points to a list of valid memory objects. - -_num_events_in_wait_list_ specifies the number of events in _event_wait_list_. - -_event_wait_list_ points to the list of events that need to complete before {clEnqueueAcquireExternalMemObjectsKHR} can be executed. -If _event_wait_list_ is `NULL`, then {clEnqueueAcquireExternalMemObjectsKHR} does not explicitly wait on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. -The events specified in _event_wait_list_ act as synchronization points. -The context associated with events in _event_wait_list_ and that of _command_queue_ must be the same. - -_event_ returns an event object that identifies this particular command and can be used to query or queue a wait for this particular command to complete. -_event_ can be `NULL` in which case it will not be possible for the application to query the status of this command or queue a wait for this command to complete. - -Applications must acquire the memory objects that are created using external handles before they can be used by any OpenCL commands queued to a command-queue. -Behavior is undefined if a memory object created from an external memory handle is used by an OpenCL command queued to a command-queue without being acquired. -This is to guarantee that the state of the memory objects is up-to-date and they are accessible to OpenCL. -See "Example with Acquire / Release" provided in <> for more details on how to use this API. - -If _num_mem_objects_ is 0 and _mem_objects_ is `NULL`, the command will trivially succeed after its event dependencies are satisfied and will update its completion event. - -{clEnqueueAcquireExternalMemObjectsKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_VALUE} if _num_mem_objects_ is zero and _mem_objects_ is not a `NULL` value or if _num_mem_objects_ is greater than 0 and _mem_objects_ is `NULL`. -* {CL_INVALID_MEM_OBJECT} if any of the memory objects in _mem_objects_ is not a valid OpenCL memory object created using an external memory handle. -* {CL_INVALID_COMMAND_QUEUE} -** if _command_queue_ is not a valid command-queue, or -** if device associated with _command_queue_ is not one of the devices specified by {CL_MEM_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _mem_objects_, or -** if one or more of _mem_objects_ belong to a context that does not contain a device associated with _command_queue_. -* {CL_INVALID_EVENT_WAIT_LIST} - ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, or - ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or - ** if event objects in _event_wait_list_ are not valid events. -* {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of any of the events in _event_wait_list_ is a negative integer value. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -To enqueue a command to release OpenCL memory objects created from external memory handles, call the function - -include::{generated}/api/protos/clEnqueueReleaseExternalMemObjectsKHR.txt[] - -_command_queue_ specifies a valid command-queue. - -_num_mem_objects_ specifies the number of memory objects to release. - -_mem_objects_ points to a list of valid memory objects. - -_num_events_in_wait_list_ specifies the number of events in _event_wait_list_. - -_event_wait_list_ points to the list of events that need to complete before {clEnqueueReleaseExternalMemObjectsKHR} can be executed. -If _event_wait_list_ is `NULL`, then {clEnqueueReleaseExternalMemObjectsKHR} does not wait on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. -The events specified in _event_wait_list_ act as synchronization points. -The context associated with events in _event_wait_list_ and that of _command_queue_ must be the same. - -_event_ returns an event object that identifies this particular command and can be used to query or queue a wait for this particular command to complete. -_event_ can be `NULL` in which case it will not be possible for the application to query the status of this command or queue a wait for this command to complete. - -Applications must release the memory objects that are acquired using {clEnqueueReleaseExternalMemObjectsKHR} before using them through any commands in the other API. -This is to guarantee that the state of memory objects is up-to-date and they are accessible to the other API. -See "Example with Acquire / Release" provided in <> for more details on how to use this API. - -If _num_mem_objects_ is 0 and _mem_objects_ is `NULL`, the command will trivially succeed after its event dependencies are satisfied and will update its completion event. - -{clEnqueueReleaseExternalMemObjectsKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_VALUE} if _num_mem_objects_ is zero and _mem_objects_ is not a `NULL` value or if _num_mem_objects_ is greater than 0 and _mem_objects_ is `NULL`. -* {CL_INVALID_MEM_OBJECT} if any of the memory objects in _mem_objects_ is not a valid OpenCL memory object created using an external memory handle. -* {CL_INVALID_COMMAND_QUEUE} -** if _command_queue_ is not a valid command-queue, or -** if device associated with _command_queue_ is not one of the devices specified by {CL_MEM_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _mem_objects_, or -** if one or more of _mem_objects_ belong to a context that does not contain a device associated with _command_queue_. -* {CL_INVALID_EVENT_WAIT_LIST} - ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, or - ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or - ** if event objects in _event_wait_list_ are not valid events. -* {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of any of the events in _event_wait_list_ is a negative integer value. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -=== Descriptions of External Memory Handle Types - -This section describes the external memory handle types that are added by related extensions. - -Applications can import the same payload into multiple OpenCL contexts and multiple times into a given OpenCL context. In all cases, each import operation must create a distinct memory object. - -==== File Descriptor Handle Types - -The `cl_khr_external_memory_opaque_fd` extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - --- - * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR} specifies a POSIX file descriptor handle that has only limited valid usage outside of OpenCL and other compatible APIs. It must be compatible with the POSIX system calls dup, dup2, close, and the non-standard system call dup3. Additionally, it must be transportable over a socket using an SCM_RIGHTS control message. It owns a reference to the underlying memory resource represented by its memory object. --- - -The `cl_khr_external_memory_dma_buf` extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - --- - * {CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR} is a file descriptor for a Linux dma_buf. It owns a reference to the underlying memory resource represented by its memory object. --- - -For these extensions, importing memory from a file descriptor transfers ownership of the file descriptor from the application to the OpenCL implementation. The application must not perform any operations on the file descriptor after a successful import. The imported memory object holds a reference to its payload. - -==== NT Handle Types - -The `cl_khr_external_memory_dx` extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - --- - * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR} specifies an NT handle returned by IDXGIResource1::CreateSharedHandle referring to a Direct3D 10 or 11 texture resource. It owns a reference to the memory used by the Direct3D resource. - - * {CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR} specifies a global share handle returned by IDXGIResource::GetSharedHandle referring to a Direct3D 10 or 11 texture resource. It does not own a reference to the underlying Direct3D resource, and will therefore become invalid when all memory objects and Direct3D resources associated with it are destroyed. - - * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR} specifies an NT handle returned by ID3D12Device::CreateSharedHandle referring to a Direct3D 12 heap resource. It owns a reference to the resources used by the Direct3D heap. - - * {CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR} specifies an NT handle returned by ID3D12Device::CreateSharedHandle referring to a Direct3D 12 committed resource. It owns a reference to the memory used by the Direct3D resource. --- - -The `cl_khr_external_memory_win32` extension extends {cl_external_memory_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a buffer or an image memory object from an external handle: - --- - * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR} specifies an NT handle that has only limited valid usage outside of OpenCL and other compatible APIs. It must be compatible with the functions DuplicateHandle, CloseHandle, CompareObjectHandles, GetHandleInformation, and SetHandleInformation. It owns a reference to the underlying memory resource represented by its memory object. - * {CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR} specifies a global share handle that has only limited valid usage outside of OpenCL and other compatible APIs. It is not compatible with any native APIs. It does not own a reference to the underlying memory resource represented by its memory object, and will therefore become invalid when all memory objects associated with it are destroyed. --- - -For these extensions, importing memory object payloads from Windows handles does not transfer ownership of the handle to the OpenCL implementation. For handle types defined as NT handles, the application must release handle ownership using the CloseHandle system call when the handle is no longer needed. For handle types defined as NT handles, the imported memory object holds a reference to its payload. - -Note: Non-NT handle import operations do not add a reference to their associated payload. If the original object owning the payload is destroyed, all resources and handles sharing that payload will become invalid. - -[[cl_khr_external_memory-Sample-Code]] -=== Sample Code - -. Example for creating a CL buffer from an exported external buffer in a single device context. -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with just first device -clCreateContext(..., 1, devices, ...); - -// Obtain fd/win32 or similar handle for external memory to be imported -// from other API. -int fd = getFdForExternalMemory(); - -// Create extMemBuffer of type cl_mem from fd. -cl_mem_properties_khr extMemProperties[] = -{ - (cl_mem_properties_khr)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, - (cl_mem_properties_khr)fd, - 0 -}; - -cl_mem extMemBuffer = clCreateBufferWithProperties(/*context*/ clContext, - /*properties*/ extMemProperties, - /*flags*/ 0, - /*size*/ size, - /*host_ptr*/ NULL, - /*errcode_ret*/ &errcode_ret); ----- --- -. Example for creating a CL Image from an exported external Image for single device usage in a multi-device context -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with first two devices -clCreateContext(..., 2, devices, ...); - -// Create img of type cl_mem usable only on devices[0] - -// Create img of type cl_mem. -// Obtain fd/win32 or similar handle for external memory to be imported -// from other API. -int fd = getFdForExternalMemory(); - -// Set cl_image_format based on external image info -cl_image_format clImgFormat = { }; -clImageFormat.image_channel_order = CL_RGBA; -clImageFormat.image_channel_data_type = CL_UNORM_INT8; - -// Set cl_image_desc based on external image info -size_t clImageFormatSize; -cl_image_desc image_desc = { }; -image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; -image_desc.image_width = width; -image_desc.image_height = height; -image_desc.image_depth = depth; -image_desc.image_array_size = num_slices; -image_desc.image_row_pitch = width * 8 * 4; // May need alignment -image_desc.image_slice_pitch = image_desc.image_row_pitch * height; -image_desc.num_mip_levels = 1; -image_desc.num_samples = 0; -image_desc.buffer = NULL; - -cl_mem_properties_khr extMemProperties[] = { - (cl_mem_properties_khr)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, - (cl_mem_properties_khr)fd, - (cl_mem_properties_khr)CL_MEM_DEVICE_HANDLE_LIST_KHR, - (cl_mem_properties_khr)devices[0], - CL_MEM_DEVICE_HANDLE_LIST_END_KHR, - 0 -}; - -cl_mem img = clCreateImageWithProperties(/*context*/ clContext, - /*properties*/ extMemProperties, - /*flags*/ 0, - /*image_format*/ &clImgFormat, - /*image_desc*/ &image_desc, - /*errcode_ret*/ &errcode_ret); - -// Use clGetImageInfo to get cl_image_format details. -size_t clImageFormatSize; -clGetImageInfo(img, - CL_IMAGE_FORMAT, - sizeof(cl_image_format), - &clImageFormat, - &clImageFormatSize); ----- --- -. Example for synchronization using Wait and Signal -+ --- -[source] ----- -// Start the main rendering loop - -// Create extSem of type cl_semaphore_khr using clCreateSemaphoreWithPropertiesKHR - -// Create extMem of type cl_mem using clCreateBufferWithProperties or clCreateImageWithProperties - -while (true) { - // (not shown) Signal the semaphore from the other API - - // Wait for the semaphore in OpenCL, by calling clEnqueueWaitSemaphoresKHR on 'extSem' - clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &extSem, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel that accesses extMem - clEnqueueNDRangeKernel(command_queue, ...); - - // Signal the semaphore in OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &extSem, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // (not shown) Launch work in other API that waits on 'extSem' -} ----- --- -. Example with memory sharing using acquire/release -+ --- -[source] ----- -// Create extSem of type cl_semaphore_khr using -// clCreateSemaphoreWithPropertiesKHR with CL_SEMAPHORE_HANDLE_*_KHR. - -// Create extMem1 and extMem2 of type cl_mem using clCreateBufferWithProperties -// or clCreateImageWithProperties - -while (true) { - // (not shown) Signal the semaphore from the other API. Wait for the - // semaphore in OpenCL, by calling clEnqueueWaitForSemaphore on extSem - clEnqueueWaitSemaphoresKHR(/*command_queue*/ cq1, - /*num_sema_objects*/ 1, - /*sema_objects*/ &extSem, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Get explicit ownership of extMem1 - clEnqueueAcquireExternalMemObjectsKHR(/*command_queue*/ cq1, - /*num_mem_objects*/ 1, - /*mem_objects*/ extMem1, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel that accesses extMem1 on cq1 on cl_device1 - clEnqueueNDRangeKernel(cq1, ..., &event1); - - // Launch kernel that accesses both extMem1 and extMem2 on cq2 on cl_device2 - // Migration of extMem1 and extMem2 handles through regular CL memory - // migration. - clEnqueueNDRangeKernel(cq2, ..., &event1, &event2); - - // Give up ownership of extMem1 before you signal the semaphore. Handle - // memory migration here. - clEnqueueReleaseExternalMemObjectsKHR(/*command_queue*/ cq2 - /*num_mem_objects*/ 1, - /*mem_objects*/ &extMem1, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Signal the semaphore from OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ cq2, - /*num_sema_objects*/ 1, - /*sema_objects*/ &extSem, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // (not shown) Launch work in other API that waits on 'extSem' - // Other API accesses ext1, but not ext2 on device-1 -} ----- --- - - -=== Issues - -. How should the import of images that are created in external APIs with non-linear tiling be robustly handled? -+ --- -*UNRESOLVED* --- diff --git a/ext/cl_khr_external_semaphore.asciidoc b/ext/cl_khr_external_semaphore.asciidoc deleted file mode 100644 index 064ebd75..00000000 --- a/ext/cl_khr_external_semaphore.asciidoc +++ /dev/null @@ -1,662 +0,0 @@ -// Copyright 2021-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_external_semaphore]] -== External Semaphores (Provisional) - -`cl_khr_semaphore` introduced semaphores as a new type along with a set of APIs for create, release, retain, wait and signal operations on it. -This extension defines APIs and mechanisms to share semaphores created in an external API by importing into and exporting from OpenCL. - -This extension defines: - -* New attributes that can be passed as part of {cl_semaphore_properties_khr_TYPE} for specifying properties of external semaphores to be imported or exported. - -* New attributes that can be passed as part of {cl_semaphore_info_khr_TYPE} for specifying properties of external semaphores to be exported. - -* An extension to {clCreateSemaphoreWithPropertiesKHR} to accept external semaphore properties allowing to import or export an external semaphore into or from OpenCL. - -* Semaphore handle types required for importing and exporting semaphores. - -* Modifications to Wait and Signal API behavior when dealing with external semaphores created from different handle types. - -* API query exportable semaphores handles using specified handle type. - -Other related extensions define specific external semaphores that may be imported into or exported from OpenCL. - -=== General Information - -==== Name Strings - -`cl_khr_external_semaphore` + -`cl_khr_external_semaphore_dx_fence` + -`cl_khr_external_semaphore_opaque_fd` + -`cl_khr_external_semaphore_sync_fd` + -`cl_khr_external_semaphore_win32` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-09-10 | 0.9.0 | Initial version (provisional). -| 2023-11-16 | 0.9.1 | Added CL_SEMAPHORE_EXPORTABLE_KHR. -| 2023-11-21 | 0.9.2 | Added re-import function call to cl_khr_external_semaphore_sync_fd -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension is written against the OpenCL Specification Version 3.0.8. - -This extension requires OpenCL 1.2. - -The `cl_khr_semaphore` extension is required as it defines semaphore objects as well as for wait and signal operations on semaphores. - -For OpenCL to be able to import external semaphores from other APIs using this extension, the other API is required to provide below mechanisms: - -* Ability to export semaphore handles -* Ability to query semaphore handle in the form of one of the handle type supported by OpenCL. - -The other APIs that want to use semaphore exported by OpenCL using this extension are required to provide below mechanism: - -* Ability to import semaphore handles using handle types exported by OpenCL. - -==== Contributors - -// spell-checker: disable -Ajit Hakke-Patil, NVIDIA + -Amit Rao, NVIDIA + -Balaji Calidas, QUALCOMM + -Ben Ashbaugh, INTEL + -Carsten Rohde, NVIDIA + -Christoph Kubisch, NVIDIA + -Debalina Bhattacharjee, NVIDIA + -Faith Ekstrand, INTEL + -James Jones, NVIDIA + -Jeremy Kemp, IMAGINATION + -Joshua Kelly, QUALCOMM + -Karthik Raghavan Ravi, NVIDIA + -Kedar Patil, NVIDIA + -Kevin Petit, ARM + -Nikhil Joshi, NVIDIA + -Sharan Ashwathnarayan, NVIDIA + -Vivek Kini, NVIDIA + -// spell-checker: enable - -=== New Types - -[source] ----- -typedef cl_uint cl_external_semaphore_handle_type_khr; ----- - -The `cl_khr_external_semaphore_sync_fd` extension adds: - -[source] ----- -typedef cl_properties cl_semaphore_reimport_properties_khr; ----- - -=== New API Functions - -[source] ----- -cl_int clGetSemaphoreHandleForTypeKHR( - cl_semaphore_khr sema_object, - cl_device_id device, - cl_external_semaphore_handle_type_khr handle_type, - size_t handle_size, - void *handle_ptr, - size_t *handle_size_ret); ----- - -The `cl_khr_external_semaphore_sync_fd` extension adds: - ----- -cl_int clReImportSemaphoreSyncFdKHR( - cl_semaphore_khr sema_object, - cl_semaphore_reimport_properties_khr *reimport_props, - int fd); ----- - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetPlatformInfo} to query external semaphore handle types that may be imported or exported by all devices in an OpenCL platform: - -[source] ----- -CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x2037 -CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x2038 ----- - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo} to query external semaphore handle types that may be imported or exported by an OpenCL device: - -[source] ----- -CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x204D -CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x204E ----- - -Following new attributes can be passed as part of {cl_semaphore_properties_khr_TYPE} and {cl_semaphore_info_khr_TYPE}: - -[source] ----- -CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x203F -CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR 0 ----- - -The following new attribute that can be passed as part of {cl_semaphore_info_khr_TYPE}: - -[source] ----- -CL_SEMAPHORE_EXPORTABLE_KHR 0x2054 ----- - -External semaphore handle type added by `cl_khr_external_semaphore_dx_fence`: - -[source] ----- -CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR 0x2059 ----- - -External semaphore handle type added by `cl_khr_external_semaphore_opaque_fd`: - -[source] ----- -CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR 0x2055 ----- - -External semaphore handle type added by `cl_khr_external_semaphore_sync_fd`: - -[source] ----- -CL_SEMAPHORE_HANDLE_SYNC_FD_KHR 0x2058 ----- - -External semaphore handle types added by `cl_khr_external_semaphore_win32`: - -[source] ----- -CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR 0x2056 -CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2057 ----- - -=== Modifications to existing APIs added by this spec - -Following new enums are added to the list of supported _param_names_ by {clGetPlatformInfo}: - -.List of supported param_names by {clGetPlatformInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Platform Info | Return Type | Description -| {CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Returns the list of importable external semaphore handle types supported by all devices in _platform_. - This size of this query may be 0 if no importable external semaphore handle types are supported by all devices in _platform_. -| {CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Returns the list of exportable external semaphore handle types supported by all devices in the platform. - This size of this query may be 0 if no exportable external semaphore handle types are supported by all devices in _platform_. -|==== - -{clGetPlatformInfo} when called with _param_name_ {CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} returns a common list of external semaphore handle types supported for importing by all devices in the platform. - -{clGetPlatformInfo} when called with _param_name_ {CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} returns a common list of external semaphore handle types supported for exporting by all devices in the platform. - -Following new enums are added to the list of supported _param_names_ by {clGetDeviceInfo}: - -.List of supported param_names by {clGetDeviceInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Device Info | Return Type | Description -| {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Returns the list of importable external semaphore handle types supported by _device_. - This size of this query may be 0 indicating that the device does not support importing semaphores. -| {CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Returns the list of exportable external semaphore handle types supported by _device_. - This size of this query may be 0 indicating that the device does not support exporting semaphores. -|==== - -{clGetDeviceInfo} when called with _param_name_ {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} returns a list of external semaphore handle types supported for importing. - -{clGetDeviceInfo} when called with _param_name_ {CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} returns a list of external semaphore handle types supported for exporting. - -One of the above two queries {CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR} and {CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} must return a non-empty list indicating support for at least one of the valid semaphore handles types either for import or for export or both. - -Following new properties are added to the list of possible supported properties by {clCreateSemaphoreWithPropertiesKHR}: - -.List of supported semaphore creation properties by {clCreateSemaphoreWithPropertiesKHR} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Semaphore Property | Property Value | Description -| {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Specifies the list of semaphore handle type properties terminated with - {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR} that can be used to export - the semaphore being created. -|==== - -Add to the list of error conditions for {clCreateSemaphoreWithPropertiesKHR}: - -* {CL_INVALID_DEVICE} if one or more devices identified by properties {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} can not import the requested external semaphore handle type. -* {CL_INVALID_VALUE} if more than one semaphore handle type is specified in the {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} list. -* {CL_INVALID_OPERATION} If _props_list_ specifies a {cl_external_semaphore_handle_type_khr_TYPE} followed by a handle as well as -{CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR}. Exporting a semaphore handle from a semaphore that was created by importing -an external semaphore handle is not permitted. - -Add to the list of supported _param_names_ by {clGetSemaphoreInfoKHR}: - -.List of supported param_names by {clGetSemaphoreInfoKHR} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Semaphore Info | Return Type | Description -| {CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR} - | {cl_external_semaphore_handle_type_khr_TYPE}[] - | Returns the list of external semaphore handle types that may be used for - exporting. The size of this query may be 0 indicating that this - semaphore does not support any handle types for exporting. -| {CL_SEMAPHORE_EXPORTABLE_KHR} - | {cl_bool_TYPE} - | Returns {CL_TRUE} if the semaphore is exportable and {CL_FALSE} otherwise. -|==== - -=== Exporting semaphore external handles - -To export an external handle from a semaphore, call the function - -include::{generated}/api/protos/clGetSemaphoreHandleForTypeKHR.txt[] - -_sema_object_ specifies a valid semaphore object with exportable properties. - -_device_ specifies a valid device for which a semaphore handle is being requested. - -_handle_type_ specifies the type of semaphore handle that should be returned for this exportable _sema_object_ and must be one of the values specified when _sema_object_ was created. - -_handle_size_ specifies the size of memory pointed by _handle_ptr_. - -_handle_ptr_ is a pointer to memory where the exported external handle is returned. -If _param_value_ is `NULL`, it is ignored. - -_handle_size_ret_ returns the actual size in bytes for the external handle. -If _handle_size_ret_ is `NULL`, it is ignored. - -{clGetSemaphoreHandleForTypeKHR} returns {CL_SUCCESS} if the semaphore handle is queried successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_SEMAPHORE_KHR} -** if _sema_object_ is not a valid semaphore -// This is redundant with the error below. -** if _sema_object_ is not exportable -* {CL_INVALID_DEVICE} -** if _device_ is not a valid device, or -** if _sema_object_ belongs to a context that is not associated with _device_, or -** if _sema_object_ can not be shared with _device_. -* {CL_INVALID_VALUE} if the requested external semaphore handle type was not specified when _sema_object_ was created. -* {CL_INVALID_VALUE} if _handle_size_ is less than the size needed to store the returned handle. -// I don't think this can happen. This would have been checked when the semaphore was created. -// ** if CL_SEMAPHORE_HANDLE_*_KHR is specified as one of the _sema_props_ and -// the property CL_SEMAPHORE_HANDLE_*_KHR does not identify a valid external -// memory handle poperty reported by -// CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR or -// CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR queries. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -=== Importing semaphore external handles - -Applications can import a semaphore payload into an existing semaphore using an -external semaphore handle. The effects of the import operation will be either -temporary or permanent, as specified by the application. If the import is -temporary, the implementation must restore the semaphore to its prior permanent -state after submitting the next semaphore wait operation. Performing a -subsequent temporary import on a semaphore before performing a semaphore wait -has no effect on this requirement; the next wait submitted on the semaphore must -still restore its last permanent state. A permanent payload import behaves as if -the target semaphore was destroyed, and a new semaphore was created with the -same handle but the imported payload. Because importing a semaphore payload -temporarily or permanently detaches the existing payload from a semaphore, -similar usage restrictions to those applied to {clReleaseSemaphoreKHR} are -applied to any command that imports a semaphore payload. Which of these import -types is used is referred to as the import operation's permanence. Each handle -type supports either one or both types of permanence. - -The implementation must perform the import operation by either referencing or -copying the payload referred to by the specified external semaphore handle, -depending on the handle's type. The import method used is referred to as the -handle type's transference. When using handle types with reference transference, -importing a payload to a semaphore adds the semaphore to the set of all -semaphores sharing that payload. This set includes the semaphore from which the -payload was exported. Semaphore signaling and waiting operations performed on -any semaphore in the set must behave as if the set were a single semaphore. -Importing a payload using handle types with copy transference creates a -duplicate copy of the payload at the time of import, but makes no further -reference to it. Semaphore signaling and waiting operations performed on the -target of copy imports must not affect any other semaphore or payload. - -Export operations have the same transference as the specified handle type's -import operations. Additionally, exporting a semaphore payload to a handle with -copy transference has the same side effects on the source semaphore's payload as -executing a semaphore wait operation. If the semaphore was using a temporarily -imported payload, the semaphore's prior permanent payload will be restored. - -Please refer to handle specific specifications for more details on transference and -permanence requirements specific to handle type. - -=== Descriptions of External Semaphore Handle Types - -This section describes the external semaphore handle types that are added by related extensions. - -Applications can import the same semaphore payload into multiple OpenCL contexts, into the same context from which it was exported, and multiple times into a given OpenCL context. -In all cases, each import operation must create a distinct semaphore object. - -==== File Descriptor Handle Types - -The `cl_khr_external_semaphore_opaque_fd` extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: - --- - * {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR} specifies a POSIX file descriptor handle that has only limited valid usage outside of OpenCL and other compatible APIs. It must be compatible with the POSIX system calls dup, dup2, close, and the non-standard system call dup3. Additionally, it must be transportable over a socket using an SCM_RIGHTS control message. It owns a reference to the underlying synchronization primitive represented by its semaphore object. --- - -Transference and permanence properties for handle types added by `cl_khr_external_semaphore_opaque_fd`: - -.Transference and Permanence Properties for `cl_khr_external_semaphore_opaque_fd` handles -[width="100%",cols="60%,<20%,<20%",options="header"] -|==== -| Handle Type | Transference | Permanence -| {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR} - | Reference - | Temporary, Permanent -|==== - -The `cl_khr_external_semaphore_sync_fd` extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: - --- - * {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} specifies a POSIX file descriptor handle to a Linux Sync File or Android Fence object. It can be used with any native API accepting a valid sync file or fence as input. It owns a reference to the underlying synchronization primitive associated with the file descriptor. Implementations which support importing this handle type must accept any type of sync or fence FD supported by the native system they are running on. --- - -The special value -1 for fd is treated like a valid sync file descriptor referring to an object that has already signaled. The import operation will succeed and the semaphore will have a temporarily imported payload as if a valid file descriptor had been provided. - -Note: This special behavior for importing an invalid sync file descriptor allows easier interoperability with other system APIs which use the convention that an invalid sync file descriptor represents work that has already completed and does not need to be waited for. It is consistent with the option for implementations to return a -1 file descriptor when exporting a {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} from a {cl_semaphore_khr_TYPE} which is signaled. - -Transference and permanence properties for handle types added by `cl_khr_external_semaphore_sync_fd`: - -.Transference and Permanence Properties for `cl_khr_external_semaphore_sync_fd` handles -[width="100%",cols="60%,<20%,<20%",options="header"] -|==== -| Handle Type | Transference | Permanence -| {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} - | Copy - | Temporary -|==== - -For these extensions, importing a semaphore payload from a file descriptor transfers ownership of the file descriptor from the application to the OpenCL implementation. The application must not perform any operations on the file descriptor after a successful import. - -A handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} may be re-imported into an existing semaphore using {clReImportSemaphoreSyncFdKHR}: - -include::{generated}/api/protos/clReImportSemaphoreSyncFdKHR.txt[] - -_sema_object_ specifies a valid semaphore object with importable properties. - -_reimport_props_ Must be `NULL`. Reserved for future use. - -_fd_ external file descriptor handle to import - -Calling {clReImportSemaphoreSyncFdKHR} is equivalent to destroying _sema_object_ and re-creating it with the original _sema_props_ -from {clCreateSemaphoreWithPropertiesKHR}, except a handle specified by _fd_ will be imported. -The semaphore _sema_object_ must have originally imported an external handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR}. - -* {CL_INVALID_SEMAPHORE_KHR} -** if _sema_object_ is not a valid semaphore -* {CL_INVALID_SEMAPHORE_KHR} if a {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} handle was not imported when _sema_object_ was created. -* {CL_INVALID_VALUE} if _fd_ is invalid. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. - -==== NT Handle Types - -The `cl_khr_external_semaphore_dx_fence` extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: - --- - * {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} specifies an NT handle returned by ID3D12Device::CreateSharedHandle referring to a Direct3D 12 fence, or ID3D11Device5::CreateFence referring to a Direct3D 11 fence. It owns a reference to the underlying synchronization primitive associated with the Direct3D fence. --- - -When waiting on semaphores using {clEnqueueWaitSemaphoresKHR} or signaling semaphores using {clEnqueueSignalSemaphoresKHR}, the semaphore payload must be provided for semaphores created from {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR}. - - -* If _sema_objects_ list has a mix of semaphores obtained from {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} and other handle types, -then the _sema_payload_list_ should point to a list of _num_sema_objects_ payload values for each semaphore in _sema_objects_. -However, the payload values corresponding to semaphores with type {CL_SEMAPHORE_TYPE_BINARY_KHR} can be set to 0 or will be ignored. - -{clEnqueueWaitSemaphoresKHR} and {clEnqueueSignalSemaphoresKHR} may return {CL_INVALID_VALUE} if _sema_objects_ list has one or more semaphores obtained from {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} and _sema_payload_list_ is NULL. - - - -Transference and permanence properties for handle types added by `cl_khr_external_semaphore_dx_fence`: - --- -.Transference and Permanence Properties for `cl_khr_external_semaphore_dx_fence` handles -[width="100%",cols="60%,<20%,<20%",options="header"] -|==== -| Handle Type | Transference | Permanence -| {CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR} - | Reference - | Temporary, Permanent -|==== --- - -The `cl_khr_external_semaphore_win32` extension extends {cl_external_semaphore_handle_type_khr_TYPE} to support the following new types of handles, and adds as a property that may be specified when creating a semaphore from an external handle: - --- - * {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR} specifies an NT handle that has only limited valid usage outside of OpenCL and other compatible APIs. It must be compatible with the functions DuplicateHandle, CloseHandle, CompareObjectHandles, GetHandleInformation, and SetHandleInformation. It owns a reference to the underlying synchronization primitive represented by its semaphore object. - * {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR} specifies a global share handle that has only limited valid usage outside of OpenCL and other compatible APIs. It is not compatible with any native APIs. It does not own a reference to the underlying synchronization primitive represented by its semaphore object, and will therefore become invalid when all semaphore objects associated with it are destroyed. --- - -Transference and permanence properties for handle types added by `cl_khr_external_semaphore_win32`: - -.Transference and Permanence Properties for `cl_khr_external_semaphore_win32` handles -[width="100%",cols="60%,<20%,<20%",options="header"] -|==== -| Handle Type | Transference | Permanence -| {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR} - | Reference - | Temporary, Permanent -| {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR} - | Reference - | Temporary, Permanent -|==== - -For these extensions, importing a semaphore payload from Windows handles does not transfer ownership of the handle to the OpenCL implementation. For handle types defined as NT handles, the application must release ownership using the CloseHandle system call when the handle is no longer needed. - -[[cl_khr_external_semaphore-Sample-Code]] -=== Sample Code - -. Example for importing a semaphore created by another API in OpenCL in a single-device context. -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with just first device -clCreateContext(..., 1, devices, ...); - -// Obtain fd/win32 or similar handle for external semaphore to be imported -// from the other API. -int fd = getFdForExternalSemaphore(); - -// Create clSema of type cl_semaphore_khr usable on the only available device -// assuming the semaphore was imported from the same device. - -cl_semaphore_properties_khr sema_props[] = - {(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, - (cl_semaphore_properties_khr)fd, - 0}; - - -int errcode_ret = 0; -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - ----- --- -. Example for importing a semaphore created by another API in OpenCL in a multi-device context for single device usage. -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with first two devices -clCreateContext(..., 2, devices, ...); - -// Obtain fd/win32 or similar handle for external semaphore to be imported -// from the other API. -int fd = getFdForExternalSemaphore(); - -// Create clSema of type cl_semaphore_khr usable only on device 1 -// assuming the semaphore was imported from the same device. -cl_semaphore_properties_khr sema_props[] = { - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, - (cl_semaphore_properties_khr)fd, - (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, - (cl_semaphore_properties_khr)devices[1], - CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, - 0 -}; - -int errcode_ret = 0; -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - ----- --- -. Example for synchronization using a semaphore created by another API and imported in OpenCL -+ --- -[source] ----- -// Create clSema using one of the above examples of external semaphore creation. - -int errcode_ret = 0; -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - -// Start the main loop - -while (true) { - // (not shown) Signal the semaphore from the other API - - // Wait for the semaphore in OpenCL - clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel - clEnqueueNDRangeKernel(command_queue, ...); - - // Signal the semaphore in OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // (not shown) Launch work in the other API that waits on 'clSema' - -} ----- --- -. Example for synchronization using semaphore exported by OpenCL -+ --- -[source] ----- - -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with first two devices -clCreateContext(..., 2, devices, ...); - -// Create clSema of type cl_semaphore_khr usable only on device 1 -cl_semaphore_properties_khr sema_props[] = { - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, - CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, - (cl_semaphore_properties_khr)devices[1], - CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, - 0 -}; - -int errcode_ret = 0; -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - -// Application queries handle-type and the exportable handle associated with the semaphore. -clGetSemaphoreInfoKHR(clSema, - CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, - sizeof(cl_external_semaphore_handle_type_khr), - &handle_type, - &handle_type_size); - -// The other API or process can use the exported semaphore handle -// to import -int fd = -1; -if (handle_type == CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR) { - clGetSemaphoreHandleForTypeKHR(clSema, - device, - CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, - sizeof(int), - &fd, - NULL); -} - -// Start the main rendering loop - -while (true) { - // (not shown) Signal the semaphore from the other API - - // Wait for the semaphore in OpenCL - clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel - clEnqueueNDRangeKernel(command_queue, ...); - - // Signal the semaphore in OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // (not shown) Launch work in the other API that waits on 'clSema' -} ----- --- diff --git a/ext/cl_khr_fp16.asciidoc b/ext/cl_khr_fp16.asciidoc deleted file mode 100644 index 595ff95c..00000000 --- a/ext/cl_khr_fp16.asciidoc +++ /dev/null @@ -1,1928 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_fp16]] -== Half Precision Floating-Point - -This section describes the *cl_khr_fp16* extension. -This extension adds support for half scalar and vector types as built-in -types that can be used for arithmetic operations, conversions etc. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_fp16-additions-to-chapter-6-of-the-opencl-2.0-specification]] -=== Additions to Chapter 6 of the OpenCL 2.0 C Specification - -The list of built-in scalar, and vector data types defined in _tables 6.1_, -and _6.2_ are extended to include the following: - -[cols="1,3",options="header",] -|==== -| *Type* | *Description* -| *half2* | A 2-component half-precision floating-point vector. -| *half3* | A 3-component half-precision floating-point vector. -| *half4* | A 4-component half-precision floating-point vector. -| *half8* | A 8-component half-precision floating-point vector. -| *half16* | A 16-component half-precision floating-point vector. -|==== - -The built-in vector data types for `halfn` are also declared as appropriate -types in the OpenCL API (and header files) that can be used by an -application. -The following table describes the built-in vector data types for `halfn` as -defined in the OpenCL C programming language and the corresponding data type -available to the application: - -[cols=",",options="header",] -|==== -| *Type in OpenCL Language* | *API type for application* -| *half2* | *cl_half2* -| *half3* | *cl_half3* -| *half4* | *cl_half4* -| *half8* | *cl_half8* -| *half16* | *cl_half16* -|==== - -The relational, equality, logical and logical unary operators described in -_section 6.3_ can be used with `half` scalar and `halfn` vector types and -shall produce a scalar `int` and vector `shortn` result respectively. - -The OpenCL compiler accepts an h and H suffix on floating-point literals, -indicating the literal is typed as a half. - -[[cl_khr_fp16-conversions]] -==== Conversions - -The implicit conversion rules specified in _section 6.2.1_ now include the -`half` scalar and `halfn` vector data types. - -The explicit casts described in _section 6.2.2_ are extended to take a -`half` scalar data type and a `halfn` vector data type. - -The explicit conversion functions described in _section 6.2.3_ are extended -to take a `half` scalar data type and a `halfn` vector data type. - -The `as_typen()` function for re-interpreting types as described in _section -6.2.4.2_ is extended to allow conversion-free casts between `shortn`, -`ushortn`, and `halfn` scalar and vector data types. - -[[cl_khr_fp16-math-functions]] -==== Math Functions - -The built-in math functions defined in _table 6.8_ (also listed below) are -extended to include appropriate versions of functions that take `half` and -`half{2|3|4|8|16}` as arguments and return values. -`gentype` now also includes `half`, `half2`, `half3`, `half4`, `half8`, and -`half16`. - -For any specific use of a function, the actual type has to be the same for -all arguments and the return type. - -._Half Precision Built-in Math Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype *acos* (gentype _x_) -| Arc cosine function. - -| gentype *acosh* (gentype _x_) -| Inverse hyperbolic cosine. - -| gentype *acospi* (gentype _x_) -| Compute *acos* (_x_) / {pi}. - -| gentype *asin* (gentype _x_) -| Arc sine function. - -| gentype *asinh* (gentype _x_) -| Inverse hyperbolic sine. - -| gentype *asinpi* (gentype _x_) -| Compute *asin* (_x_) / {pi}. - -| gentype *atan* (gentype _y_over_x_) -| Arc tangent function. - -| gentype *atan2* (gentype _y_, gentype _x_) -| Arc tangent of _y_ / _x_. - -| gentype *atanh* (gentype _x_) -| Hyperbolic arc tangent. - -| gentype *atanpi* (gentype _x_) -| Compute *atan* (_x_) / {pi}. - -| gentype *atan2pi* (gentype _y_, gentype _x_) -| Compute *atan2* (_y_, _x_) / {pi}. - -| gentype *cbrt* (gentype _x_) -| Compute cube-root. - -| gentype *ceil* (gentype _x_) -| Round to integral value using the round to positive infinity rounding - mode. - -| gentype *copysign* (gentype _x_, gentype _y_) -| Returns _x_ with its sign changed to match the sign of _y_. - -| gentype *cos* (gentype _x_) -| Compute cosine. - -| gentype *cosh* (gentype _x_) -| Compute hyperbolic cosine. - -| gentype *cospi* (gentype _x_) -| Compute *cos* ({pi} _x_). - -| gentype *erfc* (gentype _x_) -| Complementary error function. - -| gentype *erf* (gentype _x_) -| Error function encountered in integrating the normal distribution. - -| gentype *exp* (gentype _x_) -| Compute the base- e exponential of _x_. - -| gentype *exp2* (gentype _x_) -| Exponential base 2 function. - -| gentype *exp10* (gentype _x_) -| Exponential base 10 function. - -| gentype *expm1* (gentype _x_) -| Compute _e^x^_- 1.0. - -| gentype *fabs* (gentype _x_) -| Compute absolute value of a floating-point number. - -| gentype *fdim* (gentype _x_, gentype _y_) -| _x_ - _y_ if _x_ > _y_, +0 if x is less than or equal to y. - -| gentype *floor* (gentype _x_) -| Round to integral value using the round to negative infinity rounding - mode. - -| gentype *fma* (gentype _a_, gentype _b_, gentype _c_) -| Returns the correctly rounded floating-point representation of the sum of - _c_ with the infinitely precise product of _a_ and _b_. - Rounding of intermediate products shall not occur. - Edge case behavior is per the IEEE 754-2008 standard. - -| gentype *fmax* (gentype _x_, gentype _y_) + - gentype *fmax* (gentype _x_, half _y_) -| Returns _y_ if _x_ < _y_, otherwise it returns _x_. - If one argument is a NaN, *fmax()* returns the other argument. - If both arguments are NaNs, *fmax()* returns a NaN. - -| gentype *fmin* (gentype _x_, gentype _y_) + - gentype *fmin* (gentype _x_, half _y_) -| Returns _y_ if _y_ < _x_, otherwise it returns _x_. - If one argument is a NaN, *fmin()* returns the other argument. - If both arguments are NaNs, *fmin()* returns a NaN. - -| gentype *fmod* (gentype _x_, gentype _y_) -| Modulus. - Returns _x_ - _y_ * *trunc* (_x_/_y_) . - -| gentype **fract** (gentype _x_, {global} gentype *_iptr_) + - gentype **fract** (gentype _x_, {local} gentype *_iptr_) + - gentype **fract** (gentype _x_, {private} gentype *_iptr_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - gentype **fract** (gentype _x_, gentype *_iptr_) -| Returns *fmin*( _x_ - *floor* (_x_), 0x1.ffcp-1f ). - - *floor*(x) is returned in _iptr_. - -| half__n__ **frexp** (half__n x__, {global} int__n__ *exp) + - half **frexp** (half _x_, {global} int *exp) + - - half__n__ **frexp** (half__n x__, {local} int__n__ *exp) + - half **frexp** (half _x_, {local} int *exp) + - - half__n__ **frexp** (half__n x__, {private} int__n__ *exp) + - half **frexp** (half _x_, {private} int *exp) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - half__n__ **frexp** (half__n__ _x_, int__n__ *exp) + - half **frexp** (half _x_, int *exp) -| Extract mantissa and exponent from _x_. - For each component the mantissa returned is a `half` with magnitude - in the interval [1/2, 1) or 0. - Each component of _x_ equals mantissa returned * 2__^exp^__. - -| gentype *hypot* (gentype _x_, gentype _y_) -| Compute the value of the square root of __x__^2^+ __y__^2^ without undue - overflow or underflow. - -| int__n__ *ilogb* (half__n__ _x_) + - int *ilogb* (half _x_) -| Return the exponent as an integer value. - -| half__n__ *ldexp* (half__n__ _x_, int__n__ _k_) + - half__n__ *ldexp* (half__n__ _x_, int _k_) + - half *ldexp* (half _x_, int _k_) -| Multiply _x_ by 2 to the power _k_. - -| gentype **lgamma** (gentype _x_) + - - half__n__ **lgamma_r** (half__n__ _x_, {global} int__n__ *_signp_) + - half **lgamma_r** (half _x_, {global} int *_signp_) + - - half__n__ **lgamma_r** (half__n__ _x_, {local} int__n__ *_signp_) + - half **lgamma_r** (half _x_, {local} int *_signp_) + - - half__n__ **lgamma_r** (half__n__ _x_, {private} int__n__ *_signp_) + - half **lgamma_r** (half _x_, {private} int *_signp_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - half__n__ **lgamma_r** (half__n__ _x_, int__n__ *_signp_) + - half **lgamma_r** (half _x_, int *_signp_) -| Log gamma function. - Returns the natural logarithm of the absolute value of the gamma function. - The sign of the gamma function is returned in the _signp_ argument of - *lgamma_r*. - -| gentype *log* (gentype _x_) -| Compute natural logarithm. - -| gentype *log2* (gentype _x_) -| Compute a base 2 logarithm. - -| gentype *log10* (gentype _x_) -| Compute a base 10 logarithm. - -| gentype *log1p* (gentype _x_) -| Compute log~e~(1.0 + _x_) . - -| gentype *logb* (gentype _x_) -| Compute the exponent of _x_, which is the integral part of - log__~r~__\|_x_\|. - -| gentype *mad* (gentype _a_, gentype _b_, gentype _c_) -| *mad* computes _a_ * _b_ + _c_. - The function may compute _a_ * _b_ + _c_ with reduced accuracy - in the embedded profile. See the OpenCL SPIR-V Environment Specification - for details. On some hardware the mad instruction may provide better - performance than expanded computation of _a_ * _b_ + _c_. - - Note: For some usages, e.g. *mad*(a, b, -a*b), the half precision - definition of *mad*() is loose enough that almost any result is allowed - from *mad*() for some values of a and b. - -| gentype *maxmag* (gentype _x_, gentype _y_) -| Returns _x_ if \|_x_\| > \|_y_\|, _y_ if \|_y_\| > \|_x_\|, otherwise - *fmax*(_x_, _y_). - -| gentype *minmag* (gentype _x_, gentype _y_) -| Returns _x_ if \|_x_\| < \|_y_\|, _y_ if \|_y_\| < \|_x_\|, otherwise - *fmin*(_x_, _y_). - -| gentype **modf** (gentype _x_, {global} gentype *_iptr_) + - gentype **modf** (gentype _x_, {local} gentype *_iptr_) + - gentype **modf** (gentype _x_, {private} gentype *_iptr_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - gentype **modf** (gentype _x_, gentype *_iptr_) -| Decompose a floating-point number. - The *modf* function breaks the argument _x_ into integral and fractional - parts, each of which has the same sign as the argument. - It stores the integral part in the object pointed to by _iptr_. - -| half__n__ *nan* (ushort__n__ _nancode_) + - half *nan* (ushort _nancode_) -| Returns a quiet NaN. - The _nancode_ may be placed in the significand of the resulting NaN. - -| gentype *nextafter* (gentype _x_, gentype _y_) -| Computes the next representable half-precision floating-point value - following _x_ in the direction of _y_. - Thus, if _y_ is less than _x_, *nextafter*() returns the largest - representable floating-point number less than _x_. - -| gentype *pow* (gentype _x_, gentype _y_) -| Compute _x_ to the power _y_. - -| half__n__ *pown* (half__n__ _x_, int__n__ _y_) + - half *pown* (half _x_, int _y_) -| Compute _x_ to the power _y_, where _y_ is an integer. - -| gentype *powr* (gentype _x_, gentype _y_) -| Compute _x_ to the power _y_, where _x_ is >= 0. - -| gentype *remainder* (gentype _x_, gentype _y_) -| Compute the value _r_ such that _r_ = _x_ - _n_*_y_, where _n_ is the - integer nearest the exact value of _x_/_y_. - If there are two integers closest to _x_/_y_, _n_ shall be the even one. - If _r_ is zero, it is given the same sign as _x_. - -| half__n__ **remquo** (half__n__ _x_, half__n__ _y_, {global} int__n__ *_quo_) + - half **remquo** (half _x_, half _y_, {global} int *_quo_) + - - half__n__ **remquo** (half__n__ _x_, half__n__ _y_, {local} int__n__ *_quo_) + - half **remquo** (half _x_, half _y_, {local} int *_quo_) + - - half__n__ **remquo** (half__n__ _x_, half__n__ _y_, {private} int__n__ *_quo_) + - half **remquo** (half _x_, half _y_, {private} int *_quo_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - half__n__ **remquo** (half__n__ _x_, half__n__ _y_, int__n__ *_quo_) + - half **remquo** (half _x_, half _y_, int *_quo_) -| The *remquo* function computes the value r such that _r_ = _x_ - _k_*_y_, - where _k_ is the integer nearest the exact value of _x_/_y_. - If there are two integers closest to _x_/_y_, _k_ shall be the even one. - If _r_ is zero, it is given the same sign as _x_. - This is the same value that is returned by the *remainder* function. - *remquo* also calculates the lower seven bits of the integral quotient - _x_/_y_, and gives that value the same sign as _x_/_y_. - It stores this signed value in the object pointed to by _quo_. - -| gentype *rint* (gentype _x_) -| Round to integral value (using round to nearest even rounding mode) in - floating-point format. - Refer to section 7.1 for description of rounding modes. - -| half__n__ *rootn* (half__n__ _x_, int__n__ _y_) + - half *rootn* (half _x_, int _y_) -| Compute _x_ to the power 1/_y_. - -| gentype *round* (gentype _x_) -| Return the integral value nearest to _x_ rounding halfway cases away from - zero, regardless of the current rounding direction. - -| gentype *rsqrt* (gentype _x_) -| Compute inverse square root. - -| gentype *sin* (gentype _x_) -| Compute sine. - -| gentype **sincos** (gentype _x_, {global} gentype *_cosval_) + - gentype **sincos** (gentype _x_, {local} gentype *_cosval_) + - gentype **sincos** (gentype _x_, {private} gentype *_cosval_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - gentype **sincos** (gentype _x_, gentype *_cosval_) -| Compute sine and cosine of x. - The computed sine is the return value and computed cosine is returned in - _cosval_. - -| gentype *sinh* (gentype _x_) -| Compute hyperbolic sine. - -| gentype *sinpi* (gentype _x_) -| Compute *sin* ({pi} _x_). - -| gentype *sqrt* (gentype _x_) -| Compute square root. - -| gentype *tan* (gentype _x_) -| Compute tangent. - -| gentype *tanh* (gentype _x_) -| Compute hyperbolic tangent. - -| gentype *tanpi* (gentype _x_) -| Compute *tan* ({pi} _x_). - -| gentype *tgamma* (gentype _x_) -| Compute the gamma function. - -| gentype *trunc* (gentype _x_) -| Round to integral value using the round to zero rounding mode. -|==== - -The *FP_FAST_FMA_HALF* macro indicates whether the *fma()* family of -functions are fast compared with direct code for half precision -floating-point. -If defined, the *FP_FAST_FMA_HALF* macro shall indicate that the *fma()* -function generally executes about as fast as, or faster than, a multiply and -an add of *half* operands. - -The macro names given in the following list must use the values specified. -These constant expressions are suitable for use in #if preprocessing -directives. - -[source,opencl_c] ----- -#define HALF_DIG 3 -#define HALF_MANT_DIG 11 -#define HALF_MAX_10_EXP +4 -#define HALF_MAX_EXP +16 -#define HALF_MIN_10_EXP -4 -#define HALF_MIN_EXP -13 -#define HALF_RADIX 2 -#define HALF_MAX 0x1.ffcp15h -#define HALF_MIN 0x1.0p-14h -#define HALF_EPSILON 0x1.0p-10h ----- - -The following table describes the built-in macro names given above in the -OpenCL C programming language and the corresponding macro names available to -the application. - -[cols=",",options="header",] -|==== -| *Macro in OpenCL Language* | *Macro for application* -| `HALF_DIG` | {CL_HALF_DIG} -| `HALF_MANT_DIG` | {CL_HALF_MANT_DIG} -| `HALF_MAX_10_EXP` | {CL_HALF_MAX_10_EXP} -| `HALF_MAX_EXP` | {CL_HALF_MAX_EXP} -| `HALF_MIN_10_EXP` | {CL_HALF_MIN_10_EXP} -| `HALF_MIN_EXP` | {CL_HALF_MIN_EXP} -| `HALF_RADIX` | {CL_HALF_RADIX} -| `HALF_MAX` | {CL_HALF_MAX} -| `HALF_MIN` | {CL_HALF_MIN} -| `HALF_EPSILSON` | {CL_HALF_EPSILON} -|==== - -The following constants are also available. -They are of type `half` and are accurate within the precision of the `half` -type. - -[cols=",",options="header",] -|==== -| *Constant* | *Description* -| `M_E_H` | Value of e -| `M_LOG2E_H` | Value of log~2~e -| `M_LOG10E_H` | Value of log~10~e -| `M_LN2_H` | Value of log~e~2 -| `M_LN10_H` | Value of log~e~10 -| `M_PI_H` | Value of {pi} -| `M_PI_2_H` | Value of {pi} / 2 -| `M_PI_4_H` | Value of {pi} / 4 -| `M_1_PI_H` | Value of 1 / {pi} -| `M_2_PI_H` | Value of 2 / {pi} -| `M_2_SQRTPI_H` | Value of 2 / {sqrt}{pi} -| `M_SQRT2_H` | Value of {sqrt}2 -| `M_SQRT1_2_H` | Value of 1 / {sqrt}2 -|==== - -[[cl_khr_fp16-common-functions]] -==== Common Functions - -The built-in common functions defined in _table 6.12_ (also listed below) -are extended to include appropriate versions of functions that take `half` -and `half{2|3|4|8|16}` as arguments and return values. -gentype now also includes `half`, `half2`, `half3`, `half4`, `half8` and -`half16`. -These are described below. - -._Half Precision Built-in Common Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype *clamp* ( + - gentype _x_, gentype _minval_, gentype _maxval_) - - gentype *clamp* ( + - gentype _x_, half _minval_, half _maxval_) -| Returns *fmin*(*fmax*(_x_, _minval_), _maxval_). - - Results are undefined if _minval_ > _maxval_. - -| gentype *degrees* (gentype _radians_) -| Converts _radians_ to degrees, + - i.e. (180 / {pi}) * _radians_. - -| gentype *max* (gentype _x_, gentype _y_) + - gentype *max* (gentype _x_, half _y_) -| Returns _y_ if _x_ < _y_, otherwise it returns _x_. - If _x_ and _y_ are infinite or NaN, the return values are undefined. - -| gentype *min* (gentype _x_, gentype _y_) + - gentype *min* (gentype _x_, half _y_) -| Returns _y_ if _y_ < _x_, otherwise it returns _x_. - If _x_ and _y_ are infinite or NaN, the return values are undefined. - -| gentype *mix* (gentype _x_, gentype _y_, gentype _a_) + - gentype *mix* (gentype _x_, gentype _y_, half _a_) -| Returns the linear blend of _x_ and _y_ implemented as: - - _x_ + (_y_ - _x)_ * _a_ - - _a_ must be a value in the range 0.0 ... 1.0. - If _a_ is not in the range 0.0 ... 1.0, the return values are undefined. - - Note: The half precision *mix* function can be implemented using contractions such as *mad* or *fma*. - -| gentype *radians* (gentype _degrees_) -| Converts _degrees_ to radians, i.e. ({pi} / 180) * _degrees_. - -| gentype *step* (gentype _edge_, gentype _x_) + - gentype *step* (half _edge_, gentype _x_) -| Returns 0.0 if _x_ < _edge_, otherwise it returns 1.0. - -| gentype *smoothstep* ( + - gentype _edge0_, gentype _edge1_, gentype _x_) - - gentype *smoothstep* ( + - half _edge0_, half _edge1_, gentype _x_) -| Returns 0.0 if _x_ \<= _edge0_ and 1.0 if _x_ >= _edge1_ and performs - smooth Hermite interpolation between 0 and 1 when _edge0_ < _x_ < _edge1_. - This is useful in cases where you would want a threshold function with a - smooth transition. - - This is equivalent to: - - gentype _t_; + - _t_ = clamp ((_x_ - _edge0_) / (_edge1_ - _edge0_), 0, 1); + - return _t_ * _t_ * (3 - 2 * _t_); + - - Results are undefined if _edge0_ >= _edge1_. - - Note: The half precision *smoothstep* function can be implemented using contractions such as *mad* or *fma*. - -| gentype *sign* (gentype _x_) -| Returns 1.0 if _x_ > 0, -0.0 if _x_ = -0.0, +0.0 if _x_ = +0.0, or -1.0 if - _x_ < 0. - Returns 0.0 if _x_ is a NaN. - -|==== - -[[cl_khr_fp16-geometric-functions]] -==== Geometric Functions - -The built-in geometric functions defined in _table 6.13_ (also listed below) -are extended to include appropriate versions of functions that take `half` -and `half{2|3|4}` as arguments and return values. -gentype now also includes `half`, `half2`, `half3` and `half4`. -These are described below. - -Note: The half precision geometric functions can be implemented using -contractions such as *mad* or *fma*. - -._Half Precision Built-in Geometric Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| half4 *cross* (half4 _p0_, half4 _p1_) + - half3 *cross* (half3 _p0_, half3 _p1_) -| Returns the cross product of _p0.xyz_ and _p1.xyz_. - The _w_ component of the result will be 0.0. - -| half *dot* (gentype _p0_, gentype _p1_) -| Compute the dot product of _p0_ and _p1_. - -| half *distance* (gentype _p0_, gentype _p1_) -| Returns the distance between _p0_ and _p1_. - This is calculated as *length*(_p0_ - _p1_). - -| half *length* (gentype _p_) -| Return the length of vector x, i.e., + - sqrt( __p.x__^2^ + __p.y__^2^ + ... ) - -| gentype *normalize* (gentype _p_) -| Returns a vector in the same direction as _p_ but with a length of 1. - -|==== - -[[cl_khr_fp16-relational-functions]] -==== Relational Functions - -The scalar and vector relational functions described in _table 6.14_ are -extended to include versions that take `half`, `half2`, `half3`, `half4`, -`half8` and `half16` as arguments. - -The relational and equality operators (<, \<=, >, >=, !=, ==) can be used -with `halfn` vector types and shall produce a vector `shortn` result as -described in _section 6.3_. - -The functions *isequal*, *isnotequal*, *isgreater*, *isgreaterequal*, -*isless*, *islessequal*, *islessgreater*, *isfinite*, *isinf*, *isnan*, -*isnormal*, *isordered*, *isunordered* and *signbit* shall return a 0 if the -specified relation is _false_ and a 1 if the specified relation is true for -scalar argument types. -These functions shall return a 0 if the specified relation is _false_ and a --1 (i.e. all bits set) if the specified relation is _true_ for vector -argument types. - -The relational functions *isequal*, *isgreater*, *isgreaterequal*, *isless*, -*islessequal*, and *islessgreater* always return 0 if either argument is not -a number (NaN). -*isnotequal* returns 1 if one or both arguments are not a number (NaN) and -the argument type is a scalar and returns -1 if one or both arguments are -not a number (NaN) and the argument type is a vector. - -The functions described in _table 6.14_ are extended to include the `halfn` -vector types. - -._Half Precision Relational Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| int *isequal* (half _x_, half _y_) + - short__n__ *isequal* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ == _y_. - -| int *isnotequal* (half _x_, half _y_) + - short__n__ *isnotequal* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ != _y_. - -| int *isgreater* (half _x_, half _y_) + - short__n__ *isgreater* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ > _y_. - -| int *isgreaterequal* (half _x_, half _y_) + - short__n__ *isgreaterequal* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ >= _y_. - -| int *isless* (half _x_, half _y_) + - short__n__ *isless* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ < _y_. - -| int *islessequal* (half _x_, half _y_) + - short__n__ *islessequal* (half__n x__, half__n y__) -| Returns the component-wise compare of _x_ \<= _y_. - -| int *islessgreater* (half _x_, half _y_) + - short__n__ *islessgreater* (half__n x__, half__n y__) -| Returns the component-wise compare of (_x_ < _y_) \|\| (_x_ > _y_) . - -| | - -| int *isfinite* (half) + - short__n__ *isfinite* (half__n__) -| Test for finite value. - -| int *isinf* (half) + - short__n__ *isinf* (half__n__) -| Test for infinity value (positive or negative) . - -| int *isnan* (half) + - short__n__ *isnan* (half__n__) -| Test for a NaN. - -| int *isnormal* (half) + - short__n__ *isnormal* (half__n__) -| Test for a normal value. - -| int *isordered* (half _x_, half _y_) + - short__n__ *isordered* (half__n x__, half__n y__) -| Test if arguments are ordered. - *isordered*() takes arguments _x_ and _y_, and returns the result - *isequal*(_x_, _x_) && *isequal*(_y_, _y_). - -| int *isunordered* (half _x_, half _y_) + - short__n__ *isunordered* (half__n x__, half__n y__) -| Test if arguments are unordered. - *isunordered*() takes arguments _x_ and _y_, returning non-zero if _x_ or - _y_ is a NaN, and zero otherwise. - -| int *signbit* (half) + - short__n__ *signbit* (half__n__) -| Test for sign bit. - The scalar version of the function returns a 1 if the sign bit in the half - is set else returns 0. - The vector version of the function returns the following for each - component in half__n__: -1 (i.e all bits set) if the sign bit in the half - is set else returns 0. - -| | - -| half__n__ *bitselect* (half__n a__, half__n b__, half__n c__) -| Each bit of the result is the corresponding bit of _a_ if the - corresponding bit of _c_ is 0. - Otherwise it is the corresponding bit of _b_. - -| half__n__ *select* (half__n a__, half__n b__, short__n__ _c_) + - half__n__ *select* (half__n a__, half__n b__, ushort__n__ _c_) -| For each component, + - _result[i]_ = if MSB of _c[i]_ is set ? _b[i]_ : _a[i]_. + - -|==== - -[[cl_khr_fp16-vector-data-load-and-store-functions]] -==== Vector Data Load and Store Functions - -The vector data load (*vload__n__*) and store (*vstore__n__*) functions -described in _table 6.13_ (also listed below) are extended to include -versions that read or write half vector values. -The generic type `gentype` is extended to include `half`. -The generic type `gentypen` is extended to include `half2`, `half3`, -`half4`, `half8`, and `half16`. - -Note: *vload3* reads _x_, _y_, _z_ components from address -(_p_ + (_offset_ * 3)) into a 3-component vector and *vstore3* writes _x_, _y_, _z_ -components from a 3-component vector to address (_p_ + (_offset_ * 3)). - -._Half Precision Vector Data Load and Store Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype__n__ **vload__n__**(size_t _offset_, const {global} gentype *_p_) + - gentype__n__ **vload__n__**(size_t _offset_, const {local} gentype *_p_) + - gentype__n__ **vload__n__**(size_t _offset_, const {constant} gentype *_p_) + - gentype__n__ **vload__n__**(size_t _offset_, const {private} gentype *_p_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - gentype__n__ **vload__n__**(size_t _offset_, const gentype *_p_) -| Return sizeof (gentype__n__) bytes of data read from address - (_p_ + (_offset * n_)). - If gentype is half, the read address computed as (_p_ + (_offset * n_)) - must be 16-bit aligned. - -| void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {global} gentype *_p_) + - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {local} gentype *_p_) + - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {private} gentype *_p_) + - - For OpenCL C 2.0 or with the `+__opencl_c_generic_address_space+` - feature macro: + - - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, gentype *_p_) -| Write sizeof (gentype__n__) bytes given by _data_ to address - (_p_ + (_offset * n_)). - If gentype is half, the write address computed as (_p_ + (_offset * n_)) - must be 16-bit aligned. - -|==== - -[[cl_khr_fp16-async-copies-from-global-to-local-memory-local-to-global-memory-and-prefetch]] -==== Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch - -The OpenCL C programming language implements the following functions that -provide asynchronous copies between global and local memory and a prefetch -from global memory. - -The generic type `gentype` is extended to include `half`, `half2`, `half3`, -`half4`, `half8`, and `half16`. - -._Half Precision Built-in Async Copy and Prefetch Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| event_t **async_work_group_copy** ( + - {local} gentype *_dst_, + - const {global} gentype *_src_, + - size_t _num_gentypes_, event_t _event_) - - event_t **async_work_group_copy** ( + - {global} gentype _*dst_, + - const {local} gentype *_src_, + - size_t _num_gentypes_, event_t _event_) -| Perform an async copy of _num_gentypes_ gentype elements from _src_ to - _dst_. - The async copy is performed by all work-items in a work-group and this - built-in function must therefore be encountered by all work-items in a - work-group executing the kernel with the same argument values; otherwise - the results are undefined. - - Returns an event object that can be used by *wait_group_events* to wait - for the async copy to finish. - The _event_ argument can also be used to associate the - *async_work_group_copy* with a previous async copy allowing an event to be - shared by multiple async copies; otherwise _event_ should be zero. - - If _event_ argument is not zero, the event object supplied in _event_ - argument will be returned. - - This function does not perform any implicit synchronization of source data - such as using a *barrier* before performing the copy. - -| | - -| event_t **async_work_group_strided_copy** ( + - {local} gentype _*dst_, + - const {global} gentype *_src_, + - size_t _num_gentypes_, + - size_t _src_stride_, event_t _event_) - - event_t **async_work_group_strided_copy** ( + - {global} gentype _*dst_, + - const {local} gentype *_src_, + - size_t _num_gentypes_, + - size_t _dst_stride_, event_t _event_) -| Perform an async gather of _num_gentypes_ gentype elements from _src_ to - _dst_. - The _src_stride_ is the stride in elements for each gentype element read - from _src_. - The async gather is performed by all work-items in a work-group and this - built-in function must therefore be encountered by all work-items in a - work-group executing the kernel with the same argument values; otherwise - the results are undefined. - - Returns an event object that can be used by *wait_group_events* to wait - for the async copy to finish. - The _event_ argument can also be used to associate the - *async_work_group_strided_copy* with a previous async copy allowing an - event to be shared by multiple async copies; otherwise _event_ should be - zero. - - If _event_ argument is not zero, the event object supplied in _event_ - argument will be returned. - - This function does not perform any implicit synchronization of source data - such as using a *barrier* before performing the copy. - - The behavior of *async_work_group_strided_copy* is undefined if - _src_stride_ or _dst_stride_ is 0, or if the _src_stride_ or _dst_stride_ - values cause the _src_ or _dst_ pointers to exceed the upper bounds of the - address space during the copy. - -| | - -| void *wait_group_events* ( + - int _num_events_, event_t *_event_list_) -| Wait for events that identify the *async_work_group_copy* operations to - complete. - The event objects specified in _event_list_ will be released after the - wait is performed. - - This function must be encountered by all work-items in a work-group - executing the kernel with the same _num_events_ and event objects - specified in _event_list_; otherwise the results are undefined. - -| void *prefetch* ( + - const {global} gentype *__p__, size_t _num_gentypes_) -| Prefetch _num_gentypes_ * sizeof(gentype) bytes into the global cache. - The prefetch instruction is applied to a work-item in a work-group and - does not affect the functional behavior of the kernel. - -|==== - -[[cl_khr_fp16-image-read-and-write-functions]] -==== Image Read and Write Functions - -The image read and write functions defined in _tables 6.23_, _6.24_ and -_6.25_ are extended to support image color values that are a `half` type. - -==== Built-in Image Read Functions - -._Half Precision Built-in Image Read Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| half4 *read_imageh* ( + - read_only image2d_t _image_, + - sampler_t _sampler_, + - int2 _coord_) - - half4 *read_imageh* ( + - read_only image2d_t _image_, + - sampler_t _sampler_, + - float2 _coord_) -| Use the coordinate _(coord.x, coord.y)_ to do an element lookup in the 2D - image object specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats, {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - The *read_imageh* calls that take integer coordinates must use a sampler - with filter mode set to `CLK_FILTER_NEAREST`, normalized coordinates set to - `CLK_NORMALIZED_COORDS_FALSE` and addressing mode set to - `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or `CLK_ADDRESS_NONE`; - otherwise the values returned are undefined. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -| | - -| half4 *read_imageh* ( + - read_only image3d_t _image_, + - sampler_t _sampler_, + - int4 _coord_ ) - - half4 *read_imageh* ( + - read_only image3d_t _image_, + - sampler_t _sampler_, + - float4 _coord_) -| Use the coordinate _(coord.x_, _coord.y_, _coord.z)_ to do an - elementlookup in the 3D image object specified by _image_. _coord.w_ is - ignored. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - **read_imageh**returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - The *read_imageh* calls that take integer coordinates must use a sampler - with filter mode set to `CLK_FILTER_NEAREST`, normalized coordinates set to - `CLK_NORMALIZED_COORDS_FALSE` and addressing mode set to - `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or `CLK_ADDRESS_NONE`; - otherwise the values returned are undefined. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description are - undefined. - -| | - -| half4 *read_imageh* ( + - read_only image2d_array_t _image_, + - sampler_t _sampler_, + - int4 _coord_) - - half4 *read_imageh* ( + - read_only image2d_array_t _image_, + - sampler_t _sampler_, + - float4 _coord_) -| Use _coord.xy_ to do an element lookup in the 2D image identified by - _coord.z_ in the 2D image array specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with image_channel_data_type set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with image_channel_data_type set - to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with image_channel_data_type set to {CL_HALF_FLOAT}. - - The *read_imageh* calls that take integer coordinates must use a sampler - with filter mode set to `CLK_FILTER_NEAREST`, normalized coordinates set to - `CLK_NORMALIZED_COORDS_FALSE` and addressing mode set to - `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or `CLK_ADDRESS_NONE`; - otherwise the values returned are undefined. - - Values returned by *read_imageh* for image objects with - image_channel_data_type values not specified in the description above are - undefined. - -| | - -| half4 *read_imageh* ( + - read_only image1d_t _image_, + - sampler_t _sampler_, + - int _coord_) - - half4 *read_imageh* ( + - read_only image1d_t _image_, + - sampler_t _sampler_, + - float _coord_) -| Use _coord_ to do an element lookup in the 1D image object specified by - _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - The *read_imageh* calls that take integer coordinates must use a sampler - with filter mode set to `CLK_FILTER_NEAREST`, normalized coordinates set to - `CLK_NORMALIZED_COORDS_FALSE` and addressing mode set to - `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or `CLK_ADDRESS_NONE`; - otherwise the values returned are undefined. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -| | - -| half4 *read_imageh* ( + - read_only image1d_array_t _image_, + - sampler_t _sampler_, + - int2 _coord_) - - half4 *read_imageh* ( + - read_only image1d_array_t _image_, + - sampler_t _sampler_, + - float2 _coord_) -| Use _coord.x_ to do an element lookup in the 1D image identified by - _coord.y_ in the 1D image array specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with image_channel_data_type set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with image_channel_data_type set - to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with image_channel_data_type set to {CL_HALF_FLOAT}. - - The *read_imageh* calls that take integer coordinates must use a sampler - with filter mode set to `CLK_FILTER_NEAREST`, normalized coordinates set to - `CLK_NORMALIZED_COORDS_FALSE` and addressing mode set to - `CLK_ADDRESS_CLAMP_TO_EDGE`, `CLK_ADDRESS_CLAMP` or `CLK_ADDRESS_NONE`; - otherwise the values returned are undefined. - - Values returned by *read_imageh* for image objects with - image_channel_data_type values not specified in the description above are - undefined. - -|==== - -==== Built-in Image Sampler-less Read Functions - -_aQual_ in Table 6.24 refers to one of the access qualifiers. -For sampler-less read functions this may be _read_only_ or _read_write_. - -._Half Precision Built-in Image Sampler-less Read Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| half4 *read_imageh* ( + - _aQual_ image2d_t _image_, + - int2 _coord_) -| Use the coordinate _(coord.x, coord.y)_ to do an element lookup in the 2D - image object specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -| half4 *read_imageh* ( + - _aQual_ image3d_t _image_, + - int4 _coord_ ) -| Use the coordinate _(coord.x_, _coord.y_, _coord.z)_ to do an element - lookup in the 3D image object specified by _image_. _coord.w_ is ignored. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description are - undefined. - -| half4 *read_imageh* ( + - _aQual_ image2d_array_t _image_, + - int4 _coord_) -| Use _coord.xy_ to do an element lookup in the 2D image identified by - _coord.z_ in the 2D image array specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -| half4 *read_imageh* ( + - _aQual_ image1d_t _image_, + - int _coord_) - - half4 *read_imageh* ( + - _aQual_ image1d_buffer_t _image_, + - int _coord_) -| Use _coord_ to do an element lookup in the 1D image or 1D image buffer - object specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -| half4 *read_imageh* ( + - _aQual_ image1d_array_t _image_, + - int2 _coord_) -| Use _coord.x_ to do an element lookup in the 2D image identified by - _coord.y_ in the 2D image array specified by _image_. - - *read_imageh* returns half precision floating-point values in the range - [0.0 ... 1.0] for image objects created with _image_channel_data_type_ set - to one of the pre-defined packed formats or {CL_UNORM_INT8}, or - {CL_UNORM_INT16}. - - *read_imageh* returns half precision floating-point values in the range - [-1.0 ... 1.0] for image objects created with _image_channel_data_type_ - set to {CL_SNORM_INT8}, or {CL_SNORM_INT16}. - - *read_imageh* returns half precision floating-point values for image - objects created with _image_channel_data_type_ set to {CL_HALF_FLOAT}. - - Values returned by *read_imageh* for image objects with - _image_channel_data_type_ values not specified in the description above - are undefined. - -|==== - -==== Built-in Image Write Functions - -_aQual_ in Table 6.25 refers to one of the access qualifiers. -For write functions this may be _write_only_ or _read_write_. - -._Half Precision Built-in Image Write Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| void *write_imageh* ( + - _aQual_ image2d_t _image_, + - int2 _coord_, + - half4 _color_) -| Write _color_ value to location specified by _coord.xy_ in the 2D image - specified by _image_. - - Appropriate data format conversion to the specified image format is done - before writing the color value. _x_ & _y_ are considered to be - unnormalized coordinates and must be in the range 0 ... width - 1, and 0 - ... height - 1. - - *write_imageh* can only be used with image objects created with - _image_channel_data_type_ set to one of the pre-defined packed formats or - set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16} or - {CL_HALF_FLOAT}. - - The behavior of *write_imageh* for image objects created with - _image_channel_data_type_ values not specified in the description above or - with (_x_, _y_) coordinate values that are not in the range (0 ... width - - 1, 0 ... height - 1) respectively, is undefined. - -| void *write_imageh* ( + - _aQual_ image2d_array_t _image_, + - int4 _coord_, + - half4 _color_) -| Write _color_ value to location specified by _coord.xy_ in the 2D image - identified by _coord.z_ in the 2D image array specified by _image_. - - Appropriate data format conversion to the specified image format is done - before writing the color value. _coord.x_, _coord.y_ and _coord.z_ are - considered to be unnormalized coordinates and must be in the range 0 ... - image width - 1, 0 ... image height - 1 and 0 ... image number of layers - - 1. - - *write_imageh* can only be used with image objects created with - _image_channel_data_type_ set to one of the pre-defined packed formats or - set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16} or - {CL_HALF_FLOAT}. - - The behavior of *write_imageh* for image objects created with - _image_channel_data_type_ values not specified in the description above or - with (_x_, _y, z_) coordinate values that are not in the range (0 ... - image width - 1, 0 ... image height - 1, 0 ... image number of layers - - 1), respectively, is undefined. - -| void *write_imageh* ( + - _aQual_ image1d_t _image_, + - int _coord_, + - half4 _color_) - - void *write_imageh* ( + - _aQual_ image1d_buffer_t _image_, + - int _coord_, + - half4 _color_) -| Write _color_ value to location specified by _coord_ in the 1D image or 1D - image buffer object specified by _image_. - Appropriate data format conversion to the specified image format is done - before writing the color value. - _coord_ is considered to be unnormalized coordinates and must be in the - range 0 ... image width - 1. - - *write_imageh* can only be used with image objects created with - _image_channel_data_type_ set to one of the pre-defined packed formats or - set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16} or - {CL_HALF_FLOAT}. - Appropriate data format conversion will be done to convert channel data - from a floating-point value to actual data format in which the channels - are stored. - - The behavior of *write_imageh* for image objects created with - _image_channel_data_type_ values not specified in the description above or - with coordinate values that is not in the range (0 ... image width - 1), - is undefined. - -| void *write_imageh* ( + - _aQual_ image1d_array_t _image_, + - int2 _coord_, + - half4 _color_) -| Write _color_ value to location specified by _coord.x_ in the 1D image - identified by _coord.y_ in the 1D image array specified by _image_. - Appropriate data format conversion to the specified image format is done - before writing the color value. _coord.x_ and _coord.y_ are considered to - be unnormalized coordinates and must be in the range 0 ... image width - 1 - and 0 ... image number of layers - 1. - - *write_imageh* can only be used with image objects created with - _image_channel_data_type_ set to one of the pre-defined packed formats or - set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16} or - {CL_HALF_FLOAT}. - Appropriate data format conversion will be done to convert channel data - from a floating-point value to actual data format in which the channels - are stored. - - The behavior of *write_imageh* for image objects created with - _image_channel_data_type_ values not specified in the description above or - with (_x_, _y_) coordinate values that are not in the range (0 ... image - width - 1, 0 ... image number of layers - 1), respectively, is undefined. - -| void *write_imageh* ( + - _aQual_ image3d_t _image_, + - int4 _coord_, + - half4 _color_) -| Write color value to location specified by coord.xyz in the 3D image - object specified by _image_. - - Appropriate data format conversion to the specified image format is done - before writing the color value. - coord.x, coord.y and coord.z are considered to be unnormalized coordinates - and must be in the range 0 ... image width - 1, 0 ... image height - 1 and - 0 ... image depth - 1. - - *write_imageh* can only be used with image objects created with - image_channel_data_type set to one of the pre-defined packed formats or - set to {CL_SNORM_INT8}, {CL_UNORM_INT8}, {CL_SNORM_INT16}, {CL_UNORM_INT16} or - {CL_HALF_FLOAT}. - - The behavior of *write_imageh* for image objects created with - image_channel_data_type values not specified in the description above or - with (x, y, z) coordinate values that are not in the range (0 ... image - width - 1, 0 ... image height - 1, 0 ... image depth - 1), respectively, - is undefined. - - Note: This built-in function is only available if the - cl_khr_3d_image_writes extension is also supported by the device. - -|==== - -[[cl_khr_fp16-ieee754-compliance]] -==== IEEE754 Compliance - -The following table entry describes the additions to _table 4.3,_ which -allows applications to query the configuration information using -{clGetDeviceInfo} for an OpenCL device that supports half precision -floating-point. - -[cols="1,1,2",options="header",] -|==== -| *Op-code* -| *Return Type* -| *Description* - -| {CL_DEVICE_HALF_FP_CONFIG} -| {cl_device_fp_config_TYPE} -| Describes half precision floating-point capability of the OpenCL device. - This is a bit-field that describes one or more of the following values: - - {CL_FP_DENORM} -- denorms are supported - - {CL_FP_INF_NAN} -- INF and NaNs are supported - - {CL_FP_ROUND_TO_NEAREST} -- round to nearest even rounding mode supported - - {CL_FP_ROUND_TO_ZERO} -- round to zero rounding mode supported - - {CL_FP_ROUND_TO_INF} -- round to positive and negative infinity rounding - modes supported - - {CL_FP_FMA} -- IEEE754-2008 fused multiply-add is supported - - {CL_FP_SOFT_FLOAT} -- Basic floating-point operations (such as addition, - subtraction, multiplication) are implemented in software. - - The required minimum half precision floating-point capability as - implemented by this extension is: - - {CL_FP_ROUND_TO_ZERO}, or {CL_FP_ROUND_TO_NEAREST} \| {CL_FP_INF_NAN}. - -|==== - -[[cl_khr_fp16-rounding-modes]] -==== Rounding Modes - -If {CL_FP_ROUND_TO_NEAREST} is supported, the default rounding mode for -half-precision floating-point operations will be round to nearest even; -otherwise the default rounding mode will be round to zero. - -Conversions to half floating-point format must be correctly rounded using -the indicated `convert` operator rounding mode or the default rounding mode -for half-precision floating-point operations if no rounding mode is -specified by the operator, or a C-style cast is used. - -Conversions from half to integer format shall correctly round using the -indicated `convert` operator rounding mode, or towards zero if no rounding -mode is specified by the operator or a C-style cast is used. -All conversions from half to floating-point formats are exact. - -[[cl_khr_fp16-relative-error-as-ulps]] -==== Relative Error as ULPs - -In this section we discuss the maximum relative error defined as _ulp_ -(units in the last place). - -Addition, subtraction, multiplication, fused multiply-add operations on half -types are required to be correctly rounded using the default rounding mode -for half-precision floating-point operations. - -The following table describes the minimum accuracy of half precision -floating-point arithmetic operations given as ULP values. -0 ULP is used for math functions that do not require rounding. -The reference value used to compute the ULP value of an arithmetic operation -is the infinitely precise result. - -._ULP Values for Half Precision Floating-Point Arithmetic Operations_ -[cols=",,",options="header",] -|==== -| *Function* -| *Min Accuracy - Full Profile* -| *Min Accuracy - Embedded Profile* - -| *_x_ + _y_* -| Correctly rounded -| Correctly rounded - -| *_x_ - _y_* -| Correctly rounded -| Correctly rounded - -| *_x_ * _y_* -| Correctly rounded -| Correctly rounded - -| *1.0 / _x_* -| Correctly rounded -| \<= 1 ulp - -| *_x_ / _y_* -| Correctly rounded -| \<= 1 ulp - -| | | - -| *acos* -| \<= 2 ulp -| \<= 3 ulp - -| *acosh* -| \<= 2 ulp -| \<= 3 ulp - -| *acospi* -| \<= 2 ulp -| \<= 3 ulp - -| *asin* -| \<= 2 ulp -| \<= 3 ulp - -| *asinh* -| \<= 2 ulp -| \<= 3 ulp - -| *asinpi* -| \<= 2 ulp -| \<= 3 ulp - -| *atan* -| \<= 2 ulp -| \<= 3 ulp - -| *atanh* -| \<= 2 ulp -| \<= 3 ulp - -| *atanpi* -| \<= 2 ulp -| \<= 3 ulp - -| *atan2* -| \<= 2 ulp -| \<= 3 ulp - -| *atan2pi* -| \<= 2 ulp -| \<= 3 ulp - -| *cbrt* -| \<= 2 ulp -| \<= 2 ulp - -| *ceil* -| Correctly rounded -| Correctly rounded - -| *clamp* -| 0 ulp -| 0 ulp - -| *copysign* -| 0 ulp -| 0 ulp - -| *cos* -| \<= 2 ulp -| \<= 2 ulp - -| *cosh* -| \<= 2 ulp -| \<= 3 ulp - -| *cospi* -| \<= 2 ulp -| \<= 2 ulp - -// 3 operations from the 2 multiplications and 1 subtraction per component -| *cross* -| absolute error tolerance of 'max * max * (3 * HALF_EPSILON)' per vector component, where _max_ is the maximum input operand magnitude -| Implementation-defined - -| *degrees* -| \<= 2 ulp -| \<= 2 ulp - -// 0.5 ULP error in sqrt -// 1.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// = 0.5 + (1.5 * n) + (0.5 * (n - 1)) -// = 0.5 + 1.5n + (0.5n - 0.5) -// = 2n -| *distance* -| \<= 2n ulp, for gentype with vector width _n_ -| Implementation-defined - -// n + n-1 Number of operations from n multiples and (n-1) additions -// 2n - 1 -| *dot* -| absolute error tolerance of 'max * max * (2n - 1) * HALF_EPSILON', for vector width _n_ and maximum input operand magnitude _max_ across all vector components -| Implementation-defined - -| *erfc* -| \<= 4 ulp -| \<= 4 ulp - -| *erf* -| \<= 4 ulp -| \<= 4 ulp - -| *exp* -| \<= 2 ulp -| \<= 3 ulp - -| *exp2* -| \<= 2 ulp -| \<= 3 ulp - -| *exp10* -| \<= 2 ulp -| \<= 3 ulp - -| *expm1* -| \<= 2 ulp -| \<= 3 ulp - -| *fabs* -| 0 ulp -| 0 ulp - -| *fdim* -| Correctly rounded -| Correctly rounded - -| *floor* -| Correctly rounded -| Correctly rounded - -| *fma* -| Correctly rounded -| Correctly rounded - -| *fmax* -| 0 ulp -| 0 ulp - -| *fmin* -| 0 ulp -| 0 ulp - -| *fmod* -| 0 ulp -| 0 ulp - -| *fract* -| Correctly rounded -| Correctly rounded - -| *frexp* -| 0 ulp -| 0 ulp - -| *hypot* -| \<= 2 ulp -| \<= 3 ulp - -| *ilogb* -| 0 ulp -| 0 ulp - -| *ldexp* -| Correctly rounded -| Correctly rounded - -// 0.5 ULP error in sqrt -// 0.5 effect on e of taking sqrt(x + e) -// 0.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// = (0.5 + 0.5 * ((0.5 * n) + (0.5 * (n - 1)))) -// = 0.5 + 0.5 * (n - 0.5) -// = 0.25 + 0.5n -| *length* -| \<= 0.25 + 0.5n ulp, for gentype with vector width _n_ -| Implementation-defined - -| *log* -| \<= 2 ulp -| \<= 3 ulp - -| *log2* -| \<= 2 ulp -| \<= 3 ulp - -| *log10* -| \<= 2 ulp -| \<= 3 ulp - -| *log1p* -| \<= 2 ulp -| \<= 3 ulp - -| *logb* -| 0 ulp -| 0 ulp - -| *mad* -| Implementation-defined -| Implementation-defined - -| *max* -| 0 ulp -| 0 ulp - -| *maxmag* -| 0 ulp -| 0 ulp - -| *min* -| 0 ulp -| 0 ulp - -| *minmag* -| 0 ulp -| 0 ulp - -| *mix* -| Implementation-defined -| Implementation-defined - -| *modf* -| 0 ulp -| 0 ulp - -| *nan* -| 0 ulp -| 0 ulp - -| *nextafter* -| 0 ulp -| 0 ulp - -// 1.5 error in rsqrt + error in multiply -// 0.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// = 1.5 + (0.5 * n) + (0.5 * (n - 1)) -// = 1.5 + 0.5n + (0.5n - 0.5) -// = 1.0 + n -| *normalize* -| \<= 1 + n ulp, for gentype with vector width _n_ -| Implementation-defined - -| *pow(x, y)* -| \<= 4 ulp -| \<= 5 ulp - -| *pown(x, y)* -| \<= 4 ulp -| \<= 5 ulp - -| *powr(x, y)* -| \<= 4 ulp -| \<= 5 ulp - -| *radians* -| \<= 2 ulp -| \<= 2 ulp - -| *remainder* -| 0 ulp -| 0 ulp - -| *remquo* -| 0 ulp for the remainder, at least the lower 7 bits of the integral quotient -| 0 ulp for the remainder, at least the lower 7 bits of the integral quotient - -| *rint* -| Correctly rounded -| Correctly rounded - -| *rootn* -| \<= 4 ulp -| \<= 5 ulp - -| *round* -| Correctly rounded -| Correctly rounded - -| *rsqrt* -| \<=1 ulp -| \<=1 ulp - -| *sign* -| 0 ulp -| 0 ulp - -| *sin* -| \<= 2 ulp -| \<= 2 ulp - -| *sincos* -| \<= 2 ulp for sine and cosine values -| \<= 2 ulp for sine and cosine values - -| *sinh* -| \<= 2 ulp -| \<= 3 ulp - -| *sinpi* -| \<= 2 ulp -| \<= 2 ulp - -| *smoothstep* -| Implementation-defined -| Implementation-defined - -| *sqrt* -| Correctly rounded -| \<= 1 ulp - -| *step* -| 0 ulp -| 0 ulp - -| *tan* -| \<= 2 ulp -| \<= 3 ulp - -| *tanh* -| \<= 2 ulp -| \<= 3 ulp - -| *tanpi* -| \<= 2 ulp -| \<= 3 ulp - -| *tgamma* -| \<= 4 ulp -| \<= 4 ulp - -| *trunc* -| Correctly rounded -| Correctly rounded - -|==== - -Note: _Implementations may perform floating-point operations on_ `half` -_scalar or vector data types by converting the_ `half` _values to single -precision floating-point values and performing the operation in single -precision floating-point. -In this case, the implementation will use the_ `half` _scalar or vector data -type as a storage only format_. - -[[cl_khr_fp16-additions-to-chapter-8-of-the-opencl-2.0-specification]] -=== Additions to Chapter 8 of the OpenCL 2.0 C Specification - -Add new sub-sections to _section 8.3.1. Conversion rules for normalized integer channel data types_: - -[[cl_khr_fp16-converting-normalized-integer-channel-data-types-to-floating-point-values]] -==== Converting normalized integer channel data types to half precision floating-point values - -For images created with image channel data type of {CL_UNORM_INT8} and -{CL_UNORM_INT16}, *read_imagef* will convert the channel values from an -8-bit or 16-bit unsigned integer to normalized half precision -floating-point values in the range [`0.0h`, `1.0h`]. - -For images created with image channel data type of {CL_SNORM_INT8} and -{CL_SNORM_INT16}, *read_imagef* will convert the channel values from an -8-bit or 16-bit signed integer to normalized half precision floating-point -values in the range [`-1.0h`, `1.0h`]. - -These conversions are performed as follows: - -{CL_UNORM_INT8} (8-bit unsigned integer) {rightarrow} `half` - -[none] -* normalized `half` value = `round_to_half(c / 255)` - -{CL_UNORM_INT_101010} (10-bit unsigned integer) {rightarrow} `half` - -[none] -* normalized `half` value = `round_to_half(c / 1023)` - -{CL_UNORM_INT16} (16-bit unsigned integer) {rightarrow} `half` - -[none] -* normalized `half` value = `round_to_half(c / 65535)` - -{CL_SNORM_INT8} (8-bit signed integer) {rightarrow} `half` - -[none] -* normalized `half` value = `max(-1.0h, round_to_half(c / 127))` - -{CL_SNORM_INT16} (16-bit signed integer) {rightarrow} `half` - -[none] -* normalized `half` value = `max(-1.0h, round_to_half(c / 32767))` - -The accuracy of the above conversions must be \<= 1.5 ulp except for the -following cases. - -For {CL_UNORM_INT8} - -[none] -* 0 must convert to `0.0h` and -* 255 must convert to `1.0h` - -For {CL_UNORM_INT_101010} - -[none] -* 0 must convert to `0.0h` and -* 1023 must convert to `1.0h` - -For {CL_UNORM_INT16} - -[none] -* 0 must convert to `0.0h` and -* 65535 must convert to `1.0h` - -For {CL_SNORM_INT8} - -[none] -* -128 and -127 must convert to `-1.0h`, -* 0 must convert to `0.0h` and -* 127 must convert to `1.0h` - -For {CL_SNORM_INT16} - -[none] -* -32768 and -32767 must convert to `-1.0h`, -* 0 must convert to `0.0h` and -* 32767 must convert to `1.0h` - - -[[cl_khr_fp16-converting-floating-point-values-to-normalized-integer-channel-data-types]] -==== Converting half precision floating-point values to normalized integer channel data types - -For images created with image channel data type of {CL_UNORM_INT8} and -{CL_UNORM_INT16}, *write_imagef* will convert the floating-point color value -to an 8-bit or 16-bit unsigned integer. - -For images created with image channel data type of {CL_SNORM_INT8} and -{CL_SNORM_INT16}, *write_imagef* will convert the floating-point color value -to an 8-bit or 16-bit signed integer. - -The preferred conversion uses the round to nearest even (`_rte`) rounding -mode, but OpenCL implementations may choose to approximate the rounding mode -used in the conversions described below. -When approximate rounding is used instead of the preferred rounding, -the result of the conversion must satisfy the bound given below. - -`half` {rightarrow} {CL_UNORM_INT8} (8-bit unsigned integer) - -[none] -* Let f~exact~ = *max*(`0`, *min*(`f * 255`, `255`)) -* Let f~preferred~ = *convert_uchar_sat_rte*(`f * 255.0f`) -* Let f~approx~ = *convert_uchar_sat_*(`f * 255.0f`) -* *fabs*(f~exact~ - f~approx~) must be \<= 0.6 - -`half` {rightarrow} {CL_UNORM_INT_101010} (10-bit unsigned integer) - -[none] -* Let f~exact~ = *max*(`0`, *min*(`f * 1023`, `1023`)) -* Let f~preferred~ = *min*(*convert_ushort_sat_rte*(`f * 1023.0f`), `1023`) -* Let f~approx~ = *convert_ushort_sat_*(`f * 1023.0f`) -* *fabs*(f~exact~ - f~approx~) must be \<= 0.6 - -`half` {rightarrow} {CL_UNORM_INT16} (16-bit unsigned integer) - -[none] -* Let f~exact~ = *max*(`0`, *min*(`f * 65535`, `65535`)) -* Let f~preferred~ = *convert_ushort_sat_rte*(`f * 65535.0f`) -* Let f~approx~ = *convert_ushort_sat_*(`f * 65535.0f`) -* *fabs*(f~exact~ - f~approx~) must be \<= 0.6 - -`half` {rightarrow} {CL_SNORM_INT8} (8-bit signed integer) - -[none] -* Let f~exact~ = *max*(`-128`, *min*(`f * 127`, `127`)) -* Let f~preferred~ = *convert_char_sat_rte*(`f * 127.0f`) -* Let f~approx~ = *convert_char_sat_*(`f * 127.0f`) -* *fabs*(f~exact~ - f~approx~) must be \<= 0.6 - -`half` {rightarrow} {CL_SNORM_INT16} (16-bit signed integer) - -[none] -* Let f~exact~ = *max*(`-32768`, *min*(`f * 32767`, `32767`)) -* Let f~preferred~ = *convert_short_sat_rte*(`f * 32767.0f`) -* Let f~approx~ = *convert_short_sat_*(`f * 32767.0f`) -* *fabs*(f~exact~ - f~approx~) must be \<= 0.6 diff --git a/ext/cl_khr_fp64.asciidoc b/ext/cl_khr_fp64.asciidoc deleted file mode 100644 index 467830cd..00000000 --- a/ext/cl_khr_fp64.asciidoc +++ /dev/null @@ -1,1303 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_fp64]] -== Double Precision Floating-Point - -This section describes the *cl_khr_fp64* extension. -This extension became an optional core feature in OpenCL 1.2. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_fp64-additions-to-chapter-6]] -=== Additions to Chapter 6 - -The list of built-in scalar, and vector data types defined in _tables 6.1_ -and _6.2_ are extended to include the following: - -[cols="1,3",options="header",] -|==== -|*Type* | *Description* -|*double* | A double precision float. -|*double2* | A 2-component double-precision floating-point vector. -|*double3* | A 3-component double-precision floating-point vector. -|*double4* | A 4-component double-precision floating-point vector. -|*double8* | A 8-component double-precision floating-point vector. -|*double16* | A 16-component double-precision floating-point vector. -|==== - -The built-in scalar and vector data types for `doublen` are also declared as appropriate -types in the OpenCL API (and header files) that can be used by an -application. -The following table describes the built-in scalar and vector data types for `doublen` as -defined in the OpenCL C programming language and the corresponding data type -available to the application: - -[cols=",",options="header",] -|==== -|*Type in OpenCL Language* | *API type for application* -|*double* | *cl_double* -|*double2* | *cl_double2* -|*double3* | *cl_double3* -|*double4* | *cl_double4* -|*double8* | *cl_double8* -|*double16* | *cl_double16* -|==== - -The double data type must conform to the IEEE-754 double precision storage format. - -The following text is added to _Section 6.1.1.1 The half data type_: - -Conversions from double to half are correctly rounded. -Conversions from half to double are lossless. - -[[cl_khr_fp64-conversions]] -==== Conversions - -The implicit conversion rules specified in _section 6.2.1_ now include the -`double` scalar and `doublen` vector data types. - -The explicit casts described in _section 6.2.2_ are extended to take a -`double` scalar data type and a `doublen` vector data type. - -The explicit conversion functions described in _section 6.2.3_ are extended -to take a `double` scalar data type and a `doublen` vector data type. - -The `as_typen()` function for re-interpreting types as described in _section -6.2.4.2_ is extended to allow conversion-free casts between `longn`, -`ulongn` and `doublen` scalar and vector data types. - -[[cl_khr_fp64-math-functions]] -==== Math Functions - -The built-in math functions defined in _table 6.8_ (also listed below) are -extended to include appropriate versions of functions that take `double` and -`double{2|3|4|8|16}` as arguments and return values. -`gentype` now also includes `double`, `double2`, `double3`, `double4`, `double8` and -`double16`. - -For any specific use of a function, the actual type has to be the same for -all arguments and the return type. - -._Double Precision Built-in Math Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype *acos* (gentype _x_) -| Arc cosine function. - -| gentype *acosh* (gentype _x_) -| Inverse hyperbolic cosine. - -| gentype *acospi* (gentype _x_) -| Compute *acos* (_x_) / {pi}. - -| gentype *asin* (gentype _x_) -| Arc sine function. - -| gentype *asinh* (gentype _x_) -| Inverse hyperbolic sine. - -| gentype *asinpi* (gentype _x_) -| Compute *asin* (_x_) / {pi}. - -| gentype *atan* (gentype _y_over_x_) -| Arc tangent function. - -| gentype *atan2* (gentype _y_, gentype _x_) -| Arc tangent of _y_ / _x_. - -| gentype *atanh* (gentype _x_) -| Hyperbolic arc tangent. - -| gentype *atanpi* (gentype _x_) -| Compute *atan* (_x_) / {pi}. - -| gentype *atan2pi* (gentype _y_, gentype _x_) -| Compute *atan2* (_y_, _x_) / {pi}. - -| gentype *cbrt* (gentype _x_) -| Compute cube-root. - -| gentype *ceil* (gentype _x_) -| Round to integral value using the round to positive infinity rounding - mode. - -| gentype *copysign* (gentype _x_, gentype _y_) -| Returns _x_ with its sign changed to match the sign of _y_. - -| gentype *cos* (gentype _x_) -| Compute cosine. - -| gentype *cosh* (gentype _x_) -| Compute hyperbolic cosine. - -| gentype *cospi* (gentype _x_) -| Compute *cos* ({pi} _x_). - -| gentype *erfc* (gentype _x_) -| Complementary error function. - -| gentype *erf* (gentype _x_) -| Error function encountered in integrating the normal distribution. - -| gentype *exp* (gentype _x_) -| Compute the base- e exponential of _x_. - -| gentype *exp2* (gentype _x_) -| Exponential base 2 function. - -| gentype *exp10* (gentype _x_) -| Exponential base 10 function. - -| gentype *expm1* (gentype _x_) -| Compute _e^x^_- 1.0. - -| gentype *fabs* (gentype _x_) -| Compute absolute value of a floating-point number. - -| gentype *fdim* (gentype _x_, gentype _y_) -| _x_ - _y_ if _x_ > _y_, +0 if x is less than or equal to y. - -| gentype *floor* (gentype _x_) -| Round to integral value using the round to negative infinity rounding - mode. - -| gentype *fma* (gentype _a_, gentype _b_, gentype _c_) -| Returns the correctly rounded floating-point representation of the sum of - _c_ with the infinitely precise product of _a_ and _b_. - Rounding of intermediate products shall not occur. - Edge case behavior is per the IEEE 754-2008 standard. - -| gentype *fmax* (gentype _x_, gentype _y_) + - gentype *fmax* (gentype _x_, double _y_) -| Returns _y_ if _x_ < _y_, otherwise it returns _x_. - If one argument is a NaN, *fmax()* returns the other argument. - If both arguments are NaNs, *fmax()* returns a NaN. - -| gentype *fmin* (gentype _x_, gentype _y_) + - gentype *fmin* (gentype _x_, double _y_) -| Returns _y_ if _y_ < _x_, otherwise it returns _x_. - If one argument is a NaN, *fmin()* returns the other argument. - If both arguments are NaNs, *fmin()* returns a NaN. - -| gentype *fmod* (gentype _x_, gentype _y_) -| Modulus. - Returns _x_ - _y_ * *trunc* (_x_/_y_) . - -| gentype **fract** (gentype _x_, {global} gentype *_iptr_) + - gentype **fract** (gentype _x_, {local} gentype *_iptr_) + - gentype **fract** (gentype _x_, {private} gentype *_iptr_) -| Returns *fmin*( _x_ - *floor* (_x_), 0x1. fffffffffffffp-1 ). - - *floor*(x) is returned in _iptr_. - -| double__n__ **frexp** (double__n x__, {global} int__n__ *exp) + - double__n__ **frexp** (double__n x__, {local} int__n__ *exp) + - double__n__ **frexp** (double__n x__, {private} int__n__ *exp) + - double **frexp** (double _x_, {global} int *exp) + - double **frexp** (double _x_, {local} int *exp) + - double **frexp** (double _x_, {private} int *exp) -| Extract mantissa and exponent from _x_. - For each component the mantissa returned is a `double` with magnitude - in the interval [1/2, 1) or 0. - Each component of _x_ equals mantissa returned * 2__^exp^__. - -| gentype *hypot* (gentype _x_, gentype _y_) -| Compute the value of the square root of __x__^2^+ __y__^2^ without undue - overflow or underflow. - -| int__n__ *ilogb* (double__n__ _x_) + - int *ilogb* (double _x_) -| Return the exponent as an integer value. - -| double__n__ *ldexp* (double__n__ _x_, int__n__ _k_) + - double__n__ *ldexp* (double__n__ _x_, int _k_) + - double *ldexp* (double _x_, int _k_) -| Multiply _x_ by 2 to the power _k_. - -| gentype **lgamma** (gentype _x_) + - double__n__ **lgamma_r** (double__n__ _x_, {global} int__n__ *_signp_) + - double__n__ **lgamma_r** (double__n__ _x_, {local} int__n__ *_signp_) + - double__n__ **lgamma_r** (double__n__ _x_, {private} int__n__ *_signp_) + - double **lgamma_r** (double _x_, {global} int *_signp_) + - double **lgamma_r** (double _x_, {local} int *_signp_) + - double **lgamma_r** (double _x_, {private} int *_signp_) -| Log gamma function. - Returns the natural logarithm of the absolute value of the gamma function. - The sign of the gamma function is returned in the _signp_ argument of - *lgamma_r*. - -| gentype *log* (gentype _x_) -| Compute natural logarithm. - -| gentype *log2* (gentype _x_) -| Compute a base 2 logarithm. - -| gentype *log10* (gentype _x_) -| Compute a base 10 logarithm. - -| gentype *log1p* (gentype _x_) -| Compute log~e~(1.0 + _x_) . - -| gentype *logb* (gentype _x_) -| Compute the exponent of _x_, which is the integral part of - log__~r~__\|_x_\|. - -| gentype *mad* (gentype _a_, gentype _b_, gentype _c_) -| *mad* computes _a_ * _b_ + _c_. - The function may compute _a_ * _b_ + _c_ with reduced accuracy - in the embedded profile. See the OpenCL SPIR-V Environment Specification - for details. On some hardware the mad instruction may provide better - performance than expanded computation of _a_ * _b_ + _c_. - -| gentype *maxmag* (gentype _x_, gentype _y_) -| Returns _x_ if \|_x_\| > \|_y_\|, _y_ if \|_y_\| > \|_x_\|, otherwise - *fmax*(_x_, _y_). - -| gentype *minmag* (gentype _x_, gentype _y_) -| Returns _x_ if \|_x_\| < \|_y_\|, _y_ if \|_y_\| < \|_x_\|, otherwise - *fmin*(_x_, _y_). - -| gentype **modf** (gentype _x_, {global} gentype *_iptr_) + - gentype **modf** (gentype _x_, {local} gentype *_iptr_) + - gentype **modf** (gentype _x_, {private} gentype *_iptr_) -| Decompose a floating-point number. - The *modf* function breaks the argument _x_ into integral and fractional - parts, each of which has the same sign as the argument. - It stores the integral part in the object pointed to by _iptr_. - -| double__n__ *nan* (ulong__n nancode__) + - double *nan* (ulong _nancode_) -| Returns a quiet NaN. - The _nancode_ may be placed in the significand of the resulting NaN. - -| gentype *nextafter* (gentype _x_, gentype _y_) -| Computes the next representable double-precision floating-point value - following _x_ in the direction of _y_. - Thus, if _y_ is less than _x_, *nextafter*() returns the largest - representable floating-point number less than _x_. - -| gentype *pow* (gentype _x_, gentype _y_) -| Compute _x_ to the power _y_. - -| double__n__ *pown* (double__n__ _x_, int__n__ _y_) + - double *pown* (double _x_, int _y_) -| Compute _x_ to the power _y_, where _y_ is an integer. - -| gentype *powr* (gentype _x_, gentype _y_) -| Compute _x_ to the power _y_, where _x_ is >= 0. - -| gentype *remainder* (gentype _x_, gentype _y_) -| Compute the value _r_ such that _r_ = _x_ - _n_*_y_, where _n_ is the - integer nearest the exact value of _x_/_y_. - If there are two integers closest to _x_/_y_, _n_ shall be the even one. - If _r_ is zero, it is given the same sign as _x_. - -| double__n__ **remquo** (double__n__ _x_, double__n__ _y_, {global} int__n__ *_quo_) + - double__n__ **remquo** (double__n__ _x_, double__n__ _y_, {local} int__n__ *_quo_) + - double__n__ **remquo** (double__n__ _x_, double__n__ _y_, {private} int__n__ *_quo_) + - double **remquo** (double _x_, double _y_, {global} int *_quo_) + - double **remquo** (double _x_, double _y_, {local} int *_quo_) + - double **remquo** (double _x_, double _y_, {private} int *_quo_) -| The *remquo* function computes the value r such that _r_ = _x_ - _k_*_y_, - where _k_ is the integer nearest the exact value of _x_/_y_. - If there are two integers closest to _x_/_y_, _k_ shall be the even one. - If _r_ is zero, it is given the same sign as _x_. - This is the same value that is returned by the *remainder* function. - *remquo* also calculates the lower seven bits of the integral quotient - _x_/_y_, and gives that value the same sign as _x_/_y_. - It stores this signed value in the object pointed to by _quo_. - -| gentype *rint* (gentype _x_) -| Round to integral value (using round to nearest even rounding mode) in - floating-point format. - Refer to section 7.1 for description of rounding modes. - -| double__n__ *rootn* (double__n__ _x_, int__n__ _y_) + - double *rootn* (double _x_, int _y_) -| Compute _x_ to the power 1/_y_. - -| gentype *round* (gentype _x_) -| Return the integral value nearest to _x_ rounding halfway cases away from - zero, regardless of the current rounding direction. - -| gentype *rsqrt* (gentype _x_) -| Compute inverse square root. - -| gentype *sin* (gentype _x_) -| Compute sine. - -| gentype **sincos** (gentype _x_, {global} gentype *_cosval_) + - gentype **sincos** (gentype _x_, {local} gentype *_cosval_) + - gentype **sincos** (gentype _x_, {private} gentype *_cosval_) -| Compute sine and cosine of x. - The computed sine is the return value and computed cosine is returned in - _cosval_. - -| gentype *sinh* (gentype _x_) -| Compute hyperbolic sine. - -| gentype *sinpi* (gentype _x_) -| Compute *sin* ({pi} _x_). - -| gentype *sqrt* (gentype _x_) -| Compute square root. - -| gentype *tan* (gentype _x_) -| Compute tangent. - -| gentype *tanh* (gentype _x_) -| Compute hyperbolic tangent. - -| gentype *tanpi* (gentype _x_) -| Compute *tan* ({pi} _x_). - -| gentype *tgamma* (gentype _x_) -| Compute the gamma function. - -| gentype *trunc* (gentype _x_) -| Round to integral value using the round to zero rounding mode. -|==== - -In addition, the following symbolic constant will also be available: - -*HUGE_VAL* - A positive double expression that evaluates to infinity. -Used as an error value returned by the built-in math functions. - -The *FP_FAST_FMA* macro indicates whether the *fma()* family of -functions are fast compared with direct code for double precision -floating-point. -If defined, the *FP_FAST_FMA* macro shall indicate that the *fma()* -function generally executes about as fast as, or faster than, a multiply and -an add of *double* operands. - -The macro names given in the following list must use the values specified. -These constant expressions are suitable for use in #if preprocessing -directives. - -[source,opencl_c] ----- -#define DBL_DIG 15 -#define DBL_MANT_DIG 53 -#define DBL_MAX_10_EXP +308 -#define DBL_MAX_EXP +1024 -#define DBL_MIN_10_EXP -307 -#define DBL_MIN_EXP -1021 -#define DBL_RADIX 2 -#define DBL_MAX 0x1.fffffffffffffp1023 -#define DBL_MIN 0x1.0p-1022 -#define DBL_EPSILON 0x1.0p-52 ----- - -The following table describes the built-in macro names given above in the -OpenCL C programming language and the corresponding macro names available to -the application. - -[cols=",",options="header",] -|==== -| *Macro in OpenCL Language* | *Macro for application* -| `DBL_DIG` | {CL_DBL_DIG} -| `DBL_MANT_DIG` | {CL_DBL_MANT_DIG} -| `DBL_MAX_10_EXP` | {CL_DBL_MAX_10_EXP} -| `DBL_MAX_EXP` | {CL_DBL_MAX_EXP} -| `DBL_MIN_10_EXP` | {CL_DBL_MIN_10_EXP} -| `DBL_MIN_EXP` | {CL_DBL_MIN_EXP} -| `DBL_RADIX` | {CL_DBL_RADIX} -| `DBL_MAX` | {CL_DBL_MAX} -| `DBL_MIN` | {CL_DBL_MIN} -| `DBL_EPSILSON` | {CL_DBL_EPSILON} -|==== - -// TODO: DBL_RADIX / CL_DBL_RADIX? - -The following constants are also available. -They are of type `double` and are accurate within the precision of the `double` -type. - -[cols=",",options="header",] -|==== -| *Constant* | *Description* -| `M_E` | Value of e -| `M_LOG2E` | Value of log~2~e -| `M_LOG10E` | Value of log~10~e -| `M_LN2` | Value of log~e~2 -| `M_LN10` | Value of log~e~10 -| `M_PI` | Value of {pi} -| `M_PI_2` | Value of {pi} / 2 -| `M_PI_4` | Value of {pi} / 4 -| `M_1_PI` | Value of 1 / {pi} -| `M_2_PI` | Value of 2 / {pi} -| `M_2_SQRTPI` | Value of 2 / {sqrt}{pi} -| `M_SQRT2` | Value of {sqrt}2 -| `M_SQRT1_2` | Value of 1 / {sqrt}2 -|==== - -[[cl_khr_fp64-common-functions]] -==== Common Functions - -The built-in common functions defined in _table 6.12_ (also listed below) -are extended to include appropriate versions of functions that take `double` -and `double{2|3|4|8|16}` as arguments and return values. -gentype now also includes `double`, `double2`, `double3`, `double4`, `double8` and -`double16`. -These are described below. - -.Double Precision Built-in Common Functions -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype *clamp* ( + - gentype _x_, gentype _minval_, gentype _maxval_) - - gentype *clamp* ( + - gentype _x_, double _minval_, double _maxval_) -| Returns *fmin*(*fmax*(_x_, _minval_), _maxval_). - - Results are undefined if _minval_ > _maxval_. - -| gentype *degrees* (gentype _radians_) -| Converts _radians_ to degrees, + - i.e. (180 / {pi}) * _radians_. - -| gentype *max* (gentype _x_, gentype _y_) + - gentype *max* (gentype _x_, double _y_) -| Returns _y_ if _x_ < _y_, otherwise it returns _x_. - If _x_ and _y_ are infinite or NaN, the return values are undefined. - -| gentype *min* (gentype _x_, gentype _y_) + - gentype *min* (gentype _x_, double _y_) -| Returns _y_ if _y_ < _x_, otherwise it returns _x_. - If _x_ and _y_ are infinite or NaN, the return values are undefined. - -| gentype *mix* (gentype _x_, gentype _y_, gentype _a_) + - gentype *mix* (gentype _x_, gentype _y_, double _a_) -| Returns the linear blend of _x_ and _y_ implemented as: - - _x_ + (_y_ - _x)_ * _a_ - - _a_ must be a value in the range 0.0 ... 1.0. - If _a_ is not in the range 0.0 ... 1.0, the return values are undefined. - - Note: The double precision *mix* function can be implemented using contractions such as *mad* or *fma*. - -| gentype *radians* (gentype _degrees_) -| Converts _degrees_ to radians, i.e. ({pi} / 180) * _degrees_. - -| gentype *step* (gentype _edge_, gentype _x_) + - gentype *step* (double _edge_, gentype _x_) -| Returns 0.0 if _x_ < _edge_, otherwise it returns 1.0. - -| gentype *smoothstep* ( + - gentype _edge0_, gentype _edge1_, gentype _x_) + - - gentype *smoothstep* ( + - double _edge0_, double _edge1_, gentype _x_) -| Returns 0.0 if _x_ \<= _edge0_ and 1.0 if _x_ >= _edge1_ and performs - smooth Hermite interpolation between 0 and 1 when _edge0_ < _x_ < _edge1_. - This is useful in cases where you would want a threshold function with a - smooth transition. - - This is equivalent to: - - gentype _t_; + - _t_ = clamp ((_x_ - _edge0_) / (_edge1_ - _edge0_), 0, 1); + - return _t_ * _t_ * (3 - 2 * _t_); + - - Results are undefined if _edge0_ >= _edge1_. - - Note: The double precision *smoothstep* function can be implemented using contractions such as *mad* or *fma*. - -| gentype *sign* (gentype _x_) -| Returns 1.0 if _x_ > 0, -0.0 if _x_ = -0.0, +0.0 if _x_ = +0.0, or -1.0 if - _x_ < 0. - Returns 0.0 if _x_ is a NaN. - -|==== - -[[cl_khr_fp64-geometric-functions]] -==== Geometric Functions - -The built-in geometric functions defined in _table 6.13_ (also listed below) -are extended to include appropriate versions of functions that take `double` -and `double{2|3|4}` as arguments and return values. -gentype now also includes `double`, `double2`, `double3` and `double4`. -These are described below. - -Note: The double precision geometric functions can be implemented using -contractions such as *mad* or *fma*. - -._Double Precision Built-in Geometric Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| double4 *cross* (double4 _p0_, double4 _p1_) + - double3 *cross* (double3 _p0_, double3 _p1_) -| Returns the cross product of _p0.xyz_ and _p1.xyz_. - The _w_ component of the result will be 0.0. - -| double *dot* (gentype _p0_, gentype _p1_) -| Compute the dot product of _p0_ and _p1_. - -| double *distance* (gentype _p0_, gentype _p1_) -| Returns the distance between _p0_ and _p1_. - This is calculated as *length*(_p0_ - _p1_). - -| double *length* (gentype _p_) -| Return the length of vector x, i.e., + - sqrt( __p.x__^2^ + __p.y__^2^ + ... ) - -| gentype *normalize* (gentype _p_) -| Returns a vector in the same direction as _p_ but with a length of 1. - -|==== - -[[cl_khr_fp64-relational-functions]] -==== Relational Functions - -The scalar and vector relational functions described in _table 6.14_ are -extended to include versions that take `double`, `double2`, `double3`, `double4`, -`double8` and `double16` as arguments. - -The relational and equality operators (<, \<=, >, >=, !=, ==) can be used -with `doublen` vector types and shall produce a vector `longn` result as -described in _section 6.3_. - -The functions *isequal*, *isnotequal*, *isgreater*, *isgreaterequal*, -*isless*, *islessequal*, *islessgreater*, *isfinite*, *isinf*, *isnan*, -*isnormal*, *isordered*, *isunordered* and *signbit* shall return a 0 if the -specified relation is _false_ and a 1 if the specified relation is true for -scalar argument types. -These functions shall return a 0 if the specified relation is _false_ and a --1 (i.e. all bits set) if the specified relation is _true_ for vector -argument types. - -The relational functions *isequal*, *isgreater*, *isgreaterequal*, *isless*, -*islessequal*, and *islessgreater* always return 0 if either argument is not -a number (NaN). -*isnotequal* returns 1 if one or both arguments are not a number (NaN) and -the argument type is a scalar and returns -1 if one or both arguments are -not a number (NaN) and the argument type is a vector. - -The functions described in _table 6.14_ are extended to include the `doublen` -vector types. - -._Double Precision Relational Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| int *isequal* (double _x_, double _y_) + - long__n__ *isequal* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ == _y_. - -| int *isnotequal* (double _x_, double _y_) + - long__n__ *isnotequal* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ != _y_. - -| int *isgreater* (double _x_, double _y_) + - long__n__ *isgreater* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ > _y_. - -| int *isgreaterequal* (double _x_, double _y_) + - long__n__ *isgreaterequal* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ >= _y_. - -| int *isless* (double _x_, double _y_) + - long__n__ *isless* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ < _y_. - -| int *islessequal* (double _x_, double _y_) + - long__n__ *islessequal* (double__n x__, double__n y__) -| Returns the component-wise compare of _x_ \<= _y_. - -| int *islessgreater* (double _x_, double _y_) + - long__n__ *islessgreater* (double__n x__, double__n y__) -| Returns the component-wise compare of (_x_ < _y_) \|\| (_x_ > _y_) . - -| | - -| int *isfinite* (double) + - long__n__ *isfinite* (double__n__) -| Test for finite value. - -| int *isinf* (double) + - long__n__ *isinf* (double__n__) -| Test for infinity value (positive or negative) . - -| int *isnan* (double) + - long__n__ *isnan* (double__n__) -| Test for a NaN. - -| int *isnormal* (double) + - long__n__ *isnormal* (double__n__) -| Test for a normal value. - -| int *isordered* (double _x_, double _y_) + - long__n__ *isordered* (double__n x__, double__n y__) -| Test if arguments are ordered. - *isordered*() takes arguments _x_ and _y_, and returns the result - *isequal*(_x_, _x_) && *isequal*(_y_, _y_). - -| int *isunordered* (double _x_, double _y_) + - long__n__ *isunordered* (double__n x__, double__n y__) -| Test if arguments are unordered. - *isunordered*() takes arguments _x_ and _y_, returning non-zero if _x_ or - _y_ is a NaN, and zero otherwise. - -| int *signbit* (double) + - long__n__ *signbit* (double__n__) -| Test for sign bit. - The scalar version of the function returns a 1 if the sign bit in the double - is set else returns 0. - The vector version of the function returns the following for each - component in double__n__: -1 (i.e all bits set) if the sign bit in the double - is set else returns 0. - -| | - -| double__n__ *bitselect* (double__n a__, double__n b__, double__n c__) -| Each bit of the result is the corresponding bit of _a_ if the - corresponding bit of _c_ is 0. - Otherwise it is the corresponding bit of _b_. - -| double__n__ *select* (double__n a__, double__n b__, long__n c__) + - double__n__ *select* (double__n a__, double__n b__, ulong__n c__) -| For each component, + - _result[i]_ = if MSB of _c[i]_ is set ? _b[i]_ : _a[i]_. + - -|==== - -[[cl_khr_fp64-vector-data-load-and-store-functions]] -==== Vector Data Load and Store Functions - -The vector data load (*vload__n__*) and store (*vstore__n__*) functions -described in _table 6.13_ (also listed below) are extended to include -versions that read from or write to double scalar or vector values. -The generic type `gentype` is extended to include `double`. -The generic type `gentypen` is extended to include `double2`, `double3`, -`double4`, `double8` and `double16`. -The *vstore_half*, **vstore_half__n __**and **vstorea_half__n __** -functions are extended to allow a double precision scalar or vector -value to be written to memory as half values. - -Note: *vload3* reads (_x_,_y_,_z_) components from address -`(_p_ + (_offset_ * 3))` into a 3-component vector. -*vstore3*, and *vstore_half3* write (_x_,_y_,_z_) components from a -3-component vector to address `(_p_ + (_offset_ * 3))`. -In addition, *vloada_half3* reads (_x_,_y_,_z_) components from address -`(_p_ + (_offset_ * 4))` into a 3-component vector and *vstorea_half3* -writes (_x_,_y_,_z_) components from a 3-component vector to address -`(_p_ + (_offset_ * 4))`. -Whether *vloada_half3* and *vstorea_half3* read/write padding data -between the third vector element and the next alignment boundary is -implementation-defined. -*vloada_* and *vstoreaa_* variants are provided to access data that is -aligned to the size of the vector, and are intended to enable performance -on hardware that can take advantage of the increased alignment. - -._Double Precision Vector Data Load and Store Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| gentype__n__ **vload__n__**(size_t _offset_, const {global} gentype *_p_) - - gentype__n__ **vload__n__**(size_t _offset_, const {local} gentype *_p_) - - gentype__n__ **vload__n__**(size_t _offset_, const {constant} gentype *_p_) - - gentype__n__ **vload__n__**(size_t _offset_, const {private} gentype *_p_) -| Return sizeof (gentype__n__) bytes of data read from address - (_p_ + (_offset * n_)). - If gentype is double, the read address computed as (_p_ + (_offset * n_)) - must be 64-bit aligned. - -| void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {global} gentype *_p_) - - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {local} gentype *_p_) - - void **vstore__n__**(gentype__n__ _data_, size_t _offset_, {private} gentype *_p_) -| Write sizeof (gentype__n__) bytes given by _data_ to address - (_p_ + (_offset * n_)). - If gentype is double, the write address computed as (_p_ + (_offset * n_)) - must be 64-bit aligned. - -| void **vstore_half**(double _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rte}**(double _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rtz}**(double _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rtp}**(double _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {global} half *_p_) + - - void **vstore_half**(double _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rte}**(double _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rtz}**(double _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rtp}**(double _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {local} half *_p_) + - - void **vstore_half**(double _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rte}**(double _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rtz}**(double _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rtp}**(double _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half{rtn}**(double _data_, size_t _offset_, {private} half *_p_) -| The double value given by _data_ is first converted to a half value - using the appropriate rounding mode. - The half value is then written to the address computed as - (_p_ + _offset_). - The address computed as (_p_ + _offset_) must be 16-bit aligned. - - *vstore_half* uses the current rounding mode. - The default current rounding mode is round to nearest even. - -| void **vstore_half__n__**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - - void **vstore_half__n__**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - - void **vstore_half__n__**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rte}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstore_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) -| The double__n__ value given by _data_ is converted to a half__n__ value - using the appropriate rounding mode. - The half__n __value is then written to the address computed as - (_p_ + (_offset * n_)). - The address computed as (_p_ + (_offset * n_)) must be 16-bit - aligned. - - **vstore_half__n __**uses the current rounding mode. - The default current rounding mode is round to nearest even. - -| void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {global} half *_p_) + - - void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {local} half *_p_) + - - void **vstorea_half__n__**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rte}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rtz}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rtp}**(double__n__ _data_, size_t _offset_, {private} half *_p_) + - void **vstorea_half__n__{rtn}**(double__n__ _data_, size_t _offset_, {private} half *_p_) -| The double__n__ value is converted to a half__n__ value - using the appropriate rounding mode. - - For n = 1, 2, 4, 8 or 16, the half__n__ value is written to the - address computed as - (_p_ + (_offset * n_)). - The address computed as (_p_ + (_offset * n_)) must be aligned to - sizeof (half__n__) bytes. - - For n = 3, the half__3__ value is written to the address computed as - (_p_ + (_offset * 4_)). - The address computed as (_p_ + (_offset * 4_)) must be aligned to - sizeof (half) * 4 bytes. - - **vstorea_half__n__** uses the current rounding mode. - The default current rounding mode is round to nearest even. -|==== - -[[cl_khr_fp64-async-copies-from-global-to-local-memory-local-to-global-memory-and-prefetch]] -==== Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch - -The OpenCL C programming language implements the following functions that -provide asynchronous copies between global and local memory and a prefetch -from global memory. - -The generic type gentype is extended to include `double`, `double2`, `double3`, -`double4`, `double8` and `double16`. - -._Double Precision Built-in Async Copy and Prefetch Functions_ -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| event_t **async_work_group_copy** ( + - {local} gentype *_dst_, + - const {global} gentype *_src_, + - size_t _num_gentypes_, event_t _event_) - - event_t **async_work_group_copy** ( + - {global} gentype _*dst_, + - const {local} gentype *_src_, + - size_t _num_gentypes_, event_t _event_) -| Perform an async copy of _num_gentypes_ gentype elements from _src_ to - _dst_. - The async copy is performed by all work-items in a work-group and this - built-in function must therefore be encountered by all work-items in a - work-group executing the kernel with the same argument values; otherwise - the results are undefined. - - Returns an event object that can be used by *wait_group_events* to wait - for the async copy to finish. - The _event_ argument can also be used to associate the - *async_work_group_copy* with a previous async copy allowing an event to be - shared by multiple async copies; otherwise _event_ should be zero. - - If _event_ argument is not zero, the event object supplied in _event_ - argument will be returned. - - This function does not perform any implicit synchronization of source data - such as using a *barrier* before performing the copy. - -| | - -| event_t **async_work_group_strided_copy** ( + - {local} gentype _*dst_, + - const {global} gentype *_src_, + - size_t _num_gentypes_, + - size_t _src_stride_, event_t _event_) - - event_t **async_work_group_strided_copy** ( + - {global} gentype _*dst_, + - const {local} gentype *_src_, + - size_t _num_gentypes_, + - size_t _dst_stride_, event_t _event_) -| Perform an async gather of _num_gentypes_ gentype elements from _src_ to - _dst_. - The _src_stride_ is the stride in elements for each gentype element read - from _src_. - The async gather is performed by all work-items in a work-group and this - built-in function must therefore be encountered by all work-items in a - work-group executing the kernel with the same argument values; otherwise - the results are undefined. - - Returns an event object that can be used by *wait_group_events* to wait - for the async copy to finish. - The _event_ argument can also be used to associate the - *async_work_group_strided_copy* with a previous async copy allowing an - event to be shared by multiple async copies; otherwise _event_ should be - zero. - - If _event_ argument is not zero, the event object supplied in _event_ - argument will be returned. - - This function does not perform any implicit synchronization of source data - such as using a *barrier* before performing the copy. - - The behavior of *async_work_group_strided_copy* is undefined if - _src_stride_ or _dst_stride_ is 0, or if the _src_stride_ or _dst_stride_ - values cause the _src_ or _dst_ pointers to exceed the upper bounds of the - address space during the copy. - -| | - -| void *wait_group_events* ( + - int _num_events_, event_t *_event_list_) -| Wait for events that identify the *async_work_group_copy* operations to - complete. - The event objects specified in _event_list_ will be released after the - wait is performed. - - This function must be encountered by all work-items in a work-group - executing the kernel with the same _num_events_ and event objects - specified in _event_list_; otherwise the results are undefined. - -| void *prefetch* ( + - const {global} gentype *__p__, size_t _num_gentypes_) -| Prefetch _num_gentypes_ * sizeof(gentype) bytes into the global cache. - The prefetch instruction is applied to a work-item in a work-group and - does not affect the functional behavior of the kernel. - -|==== - -[[cl_khr_fp64-ieee754-compliance]] -==== IEEE754 Compliance - -The following table entry describes the additions to _table 4.3,_ which -allows applications to query the configuration information using -{clGetDeviceInfo} for an OpenCL device that supports double precision -floating-point. - -[cols="1,1,2",options="header",] -|==== -| *Op-code* -| *Return Type* -| *Description* - -| {CL_DEVICE_DOUBLE_FP_CONFIG} -| {cl_device_fp_config_TYPE} -| Describes double precision floating-point capability of the OpenCL device. - This is a bit-field that describes one or more of the following values: - - {CL_FP_DENORM} -- denorms are supported - - {CL_FP_INF_NAN} -- INF and NaNs are supported - - {CL_FP_ROUND_TO_NEAREST} -- round to nearest even rounding mode supported - - {CL_FP_ROUND_TO_ZERO} -- round to zero rounding mode supported - - {CL_FP_ROUND_TO_INF} -- round to positive and negative infinity rounding - modes supported - - {CL_FP_FMA} -- IEEE754-2008 fused multiply-add is supported - - {CL_FP_SOFT_FLOAT} -- Basic floating-point operations (such as addition, - subtraction, multiplication) are implemented in software. - - The required minimum double precision floating-point capability as - implemented by this extension is: - - {CL_FP_FMA} \| + - {CL_FP_ROUND_TO_NEAREST} \| + - {CL_FP_ROUND_TO_ZERO} \| + - {CL_FP_ROUND_TO_INF} \| + - {CL_FP_INF_NAN} \| + - {CL_FP_DENORM}. - -|==== - -IEEE754 fused multiply-add, denorms, INF and NaNs are required to be -supported for double precision floating-point numbers and operations -on double precision floating-point numbers. - -[[cl_khr_fp64-relative-error-as-ulps]] -==== Relative Error as ULPs - -In this section we discuss the maximum relative error defined as _ulp_ -(units in the last place). - -Addition, subtraction, multiplication, fused multiply-add and conversion -between integer and a floating-point format are IEEE 754 compliant and -are therefore correctly rounded using round-to-nearest even rounding mode. - -The following table describes the minimum accuracy of double precision -floating-point arithmetic operations given as ULP values. -0 ULP is used for math functions that do not require rounding. -The reference value used to compute the ULP value of an arithmetic operation -is the infinitely precise result. - -._ULP Values for Double Precision Floating-Point Arithmetic Operations_ -[cols=",",options="header",] -|==== -| *Function* -| *Min Accuracy* - -| *_x_ + _y_* -| Correctly rounded - -| *_x_ - _y_* -| Correctly rounded - -| *_x_ * _y_* -| Correctly rounded - -| *1.0 / _x_* -| Correctly rounded - -| *_x_ / _y_* -| Correctly rounded - -| | - -| *acos* -| \<= 4 ulp - -| *acosh* -| \<= 4 ulp - -| *acospi* -| \<= 5 ulp - -| *asin* -| \<= 4 ulp - -| *asinh* -| \<= 4 ulp - -| *asinpi* -| \<= 5 ulp - -| *atan* -| \<= 5 ulp - -| *atanh* -| \<= 5 ulp - -| *atanpi* -| \<= 5 ulp - -| *atan2* -| \<= 6 ulp - -| *atan2pi* -| \<= 6 ulp - -| *cbrt* -| \<= 2 ulp - -| *ceil* -| Correctly rounded - -| *clamp* -| 0 ulp - -| *copysign* -| 0 ulp - -| *cos* -| \<= 4 ulp - -| *cosh* -| \<= 4 ulp - -| *cospi* -| \<= 4 ulp - -// 3 operations from the 2 multiplications and 1 subtraction per component -| *cross* -| absolute error tolerance of 'max * max * (3 * FLT_EPSILON)' per vector component, where _max_ is the maximum input operand magnitude - -| *degrees* -| \<= 2 ulp - -// 3 ULP error in sqrt -// 0.5 effect on e of taking sqrt(x + e) -// 1.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// 2 accounts for error in reference code -// -// = 2 * (3 + 0.5 * ((1.5 * n) + (0.5 * (n - 1)))) -// = 2 * (3 + 0.5 * (1.5n + (0.5n - 0.5))) -// = 2 * (3 + 0.5 * (2n - 0.5)) -// = 2 * (3 + n - 0.25) -// = 2 * (2.75 + n) -// = 5.5 + 2n -| *distance* -| \<= 5.5 + 2n ulp, for gentype with vector width _n_ - -// n + n-1 Number of operations from n multiples and (n-1) additions -// 2n - 1 -| *dot* -| absolute error tolerance of 'max * max * (2n - 1) * FLT_EPSILON', for vector width _n_ and maximum input operand magnitude _max_ across all vector components - -| *erfc* -| \<= 16 ulp - -| *erf* -| \<= 16 ulp - -| *exp* -| \<= 3 ulp - -| *exp2* -| \<= 3 ulp - -| *exp10* -| \<= 3 ulp - -| *expm1* -| \<= 3 ulp - -| *fabs* -| 0 ulp - -| *fdim* -| Correctly rounded - -| *floor* -| Correctly rounded - -| *fma* -| Correctly rounded - -| *fmax* -| 0 ulp - -| *fmin* -| 0 ulp - -| *fmod* -| 0 ulp - -| *fract* -| Correctly rounded - -| *frexp* -| 0 ulp - -| *hypot* -| \<= 4 ulp - -| *ilogb* -| 0 ulp - -| *ldexp* -| Correctly rounded - -// 3 ULP error in sqrt -// 0.5 effect on e of taking sqrt(x + e) -// 0.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// 2 accounts for error in reference code -// -// = 2 * (3 + 0.5 * ((0.5 * n) + (0.5 * (n - 1)))) -// = 2 * (3 + 0.5 * (n - 0.5)) -// = 2 * (2.75 + 0.5n) -// = 5.5 + n -| *length* -| \<= 5.5 + n ulp, for gentype with vector width _n_ - -| *log* -| \<= 3 ulp - -| *log2* -| \<= 3 ulp - -| *log10* -| \<= 3 ulp - -| *log1p* -| \<= 2 ulp - -| *logb* -| 0 ulp - -| *mad* -| Implementation-defined - -| *max* -| 0 ulp - -| *maxmag* -| 0 ulp - -| *min* -| 0 ulp - -| *minmag* -| 0 ulp - -| *mix* -| Implementation-defined - -| *modf* -| 0 ulp - -| *nan* -| 0 ulp - -| *nextafter* -| 0 ulp - -// 2.5 error in rsqrt + error in multiply -// 0.5 effect on e of taking sqrt(x + e) -// 0.5 * n cumulative error for multiplications -// 0.5 * (n-1) cumulative error for additions -// -// 2 accounts for error in reference code -// -// = 2 * (2.5 + 0.5 * ((0.5 * n) + (0.5 * (n - 1)))) -// = 2 * (2.5 + 0.5 * (0.5n + (0.5n - 0.5))) -// = 2 * (2.5 + 0.5 * (n - 0.5)) -// = 2 * (2.5 + 0.5n - 0.25) -// = 2 * (2.25 + 0.5n) -// = 4.5 + n -| *normalize* -| \<= 4.5 + n ulp, for gentype with vector width _n_ - -| *pow(x, y)* -| \<= 16 ulp - -| *pown(x, y)* -| \<= 16 ulp - -| *powr(x, y)* -| \<= 16 ulp - -| *radians* -| \<= 2 ulp - -| *remainder* -| 0 ulp - -| *remquo* -| 0 ulp for the remainder, at least the lower 7 bits of the integral quotient - -| *rint* -| Correctly rounded - -| *rootn* -| \<= 16 ulp - -| *round* -| Correctly rounded - -| *rsqrt* -| \<= 2 ulp - -| *sign* -| 0 ulp - -| *sin* -| \<= 4 ulp - -| *sincos* -| \<= 4 ulp for sine and cosine values - -| *sinh* -| \<= 4 ulp - -| *sinpi* -| \<= 4 ulp - -| *smoothstep* -| Implementation-defined - -| *sqrt* -| Correctly rounded - -| *step* -| 0 ulp - -| *tan* -| \<= 5 ulp - -| *tanh* -| \<= 5 ulp - -| *tanpi* -| \<= 6 ulp - -| *tgamma* -| \<= 16 ulp - -| *trunc* -| Correctly rounded - -|==== diff --git a/ext/cl_khr_gl_depth_images.asciidoc b/ext/cl_khr_gl_depth_images.asciidoc deleted file mode 100644 index c958da90..00000000 --- a/ext/cl_khr_gl_depth_images.asciidoc +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_gl_depth_images]] -== Sharing OpenGL and OpenGL ES Depth and Depth-Stencil Images - -This section describes the *cl_khr_gl_depth_images* extension. -The *cl_khr_gl_depth_images* extends OpenCL / OpenGL sharing (the -cl_khr_gl_sharing_extension) defined in -<> to allow an OpenCL image to be created from an OpenGL depth or -depth-stencil texture. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_gl_depth_images-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -The *cl_khr_gl_depth_images* extension extends OpenCL / OpenGL sharing by -allowing an OpenCL depth image to be created from an OpenGL depth or -depth-stencil texture. -Depth images with an image channel order of CL_DEPTH_STENCIL can only be -created using the *clCreateFromGLTexture* API. - -This extension adds the following new image format for depth-stencil images -to _table 5.6 and 5.7_ of the OpenCL 2.2 specification. - -[cols="",options="header",] -|==== -| *Enum values that can be specified in channel_order* - -| *CL_DEPTH_STENCIL*. - This format can only be used if channel data type = CL_UNORM_INT24 or - CL_FLOAT. - -|==== - -[cols=",",options="header",] -|==== -| *Image Channel Data Type* -| *Description* - -| *CL_UNORM_INT24* -| Each channel component is a normalized unsigned 24-bit integer value - -| *CL_FLOAT* -| Each channel component is a single precision floating-point value -|==== - -This extension adds the following new image format to the minimum list of -supported image formats described in _tables 5.8.a_ and _5.8.b_. - -[[cl_khr_gl_depth_images-required-image-formats]] -._Required Image Formats for_ *cl_khr_gl_depth_images* -[cols=",,,",] -|==== -| *num_channels* -| *channel_order* -| *channel_data_type* -| *read / write* - -| 1 -| CL_DEPTH_STENCIL -| CL_UNORM_INT24 + - CL_FLOAT -| read only - -|==== - -For the image format given by channel order of CL_DEPTH_STENCIL and channel -data type of CL_UNORM_INT24, the depth is stored as an unsigned normalized -24-bit value. - -For the image format given by channel order of CL_DEPTH_STENCIL and channel -data type of CL_FLOAT, each pixel is two 32-bit values. -The depth is stored as a single precision floating-point value followed by -the stencil which is stored as a 8-bit integer value. - -The stencil value cannot be read or written using the *read_imagef* and -*write_imagef* built-in functions in an OpenCL kernel. - -Depth image objects with an image channel order equal to CL_DEPTH_STENCIL -cannot be used as arguments to clEnqueueReadImage, clEnqueueWriteImage, -clEnqueueCopyImage, clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, -clEnqueueMapImage and clEnqueueFillImage and will return a -CL_INVALID_OPERATION error. - -[[cl_khr_gl_depth_images-additions-to-extension-specification]] -=== Additions to the OpenCL Extension Specification - -The following new image formats are added to the table of -<> in the OpenCL extension -specification. -If an OpenGL texture object with an internal format in this table is -successfully created by OpenGL, then there is guaranteed to be a mapping to -one of the corresponding OpenCL image format(s) in that table. - -[cols=",",options="header",] -|==== -| *GL internal format* -| *CL image format* - - *(channel order, channel data type)* - -| GL_DEPTH_COMPONENT32F | CL_DEPTH, CL_FLOAT -| GL_DEPTH_COMPONENT16 | CL_DEPTH, CL_UNORM_INT16 -| GL_DEPTH24_STENCIL8 | CL_DEPTH_STENCIL, CL_UNORM_INT24 -| GL_DEPTH32F_STENCIL8 | CL_DEPTH_STENCIL, CL_FLOAT -|==== - diff --git a/ext/cl_khr_gl_event.asciidoc b/ext/cl_khr_gl_event.asciidoc deleted file mode 100644 index d5c3b686..00000000 --- a/ext/cl_khr_gl_event.asciidoc +++ /dev/null @@ -1,274 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_gl_event]] -== Creating OpenCL Event Objects from OpenGL Sync Objects - -[[cl_khr_gl_event-overview]] -=== Overview - -This section describes the *cl_khr_gl_event* extension. -This extension allows creating OpenCL event objects linked to OpenGL fence -sync objects, potentially improving efficiency of sharing images and buffers -between the two APIs. -The companion *GL_ARB_cl_event* extension provides the complementary -functionality of creating an OpenGL sync object from an OpenCL event object. - -In addition, this extension modifies the behavior of -*clEnqueueAcquireGLObjects* and *clEnqueueReleaseGLObjects* to implicitly -guarantee synchronization with an OpenGL context bound in the same thread as -the OpenCL context. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_gl_event-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_event clCreateEventFromGLsyncKHR(cl_context context, - GLsync sync, - cl_int *errcode_ret); ----- - -[[cl_khr_gl_event-new-tokens]] -=== New Tokens - -Returned by *clGetEventInfo* when _param_name_ is CL_EVENT_COMMAND_TYPE: - ----- -CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR ----- - -[[cl_khr_gl_event-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add following to the fourth paragraph of _section 5.11_ (prior to the -description of *clWaitForEvents*): - -"`Event objects can also be used to reflect the status of an OpenGL sync -object. -The sync object in turn refers to a fence command executing in an OpenGL -command stream. -This provides another method of coordinating sharing of buffers and images -between OpenGL and OpenCL.`" - -Add CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR to the valid _param_value_ values -returned by *clGetEventInfo* for _param_name_ CL_EVENT_COMMAND_TYPE (in the -third row and third column of _table 5.22_). - -Add new _subsection 5.11.1_: - -"`*5.11.1 Linking Event Objects to OpenGL Synchronization Objects* - -An event object may be created by linking to an OpenGL *sync object*. -Completion of such an event object is equivalent to waiting for completion -of the fence command associated with the linked GL sync object. - -The function -indexterm:[clCreateEventFromGLsyncKHR] -[source,opencl] ----- -cl_event clCreateEventFromGLsyncKHR(cl_context context, - GLsync sync, - cl_int *errcode_ret) ----- - -creates a linked event object. - -_context_ is a valid OpenCL context created from an OpenGL context or share -group, using the *cl_khr_gl_sharing* extension. - -_sync_ is the name of a sync object in the GL share group associated with -_context_. - -*clCreateEventFromGLsyncKHR* returns a valid OpenCL event object and -_errcode_ret_ is set to CL_SUCCESS if the event object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid context, or was not - created from a GL context. - * CL_INVALID_GL_OBJECT if _sync_ is not the name of a sync object in the - GL share group associated with _context_. - -The parameters of an event object linked to a GL sync object will return the -following values when queried with *clGetEventInfo*: - - * The CL_EVENT_COMMAND_QUEUE of a linked event is `NULL`, because the - event is not associated with any OpenCL command-queue. - * The CL_EVENT_COMMAND_TYPE of a linked event is - CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR, indicating that the event is - associated with a GL sync object, rather than an OpenCL command. - * The CL_EVENT_COMMAND_EXECUTION_STATUS of a linked event is either - CL_SUBMITTED, indicating that the fence command associated with the sync - object has not yet completed, or CL_COMPLETE, indicating that the fence - command has completed. - -*clCreateEventFromGLsyncKHR* performs an implicit *clRetainEvent* on the -returned event object. -Creating a linked event object also places a reference on the linked GL sync -object. -When the event object is deleted, the reference will be removed from the GL -sync object. - -Events returned from *clCreateEventFromGLsyncKHR* can be used in the -_event_wait_list_ argument to *clEnqueueAcquireGLObjects* and CL APIs that -take a cl_event as an argument but do not enqueue commands. -Passing such events to any other CL API that enqueues commands will generate -a CL_INVALID_EVENT error.`" - -[[cl_khr_gl_event-additions-to-extension-specification]] -=== Additions to the OpenCL Extension Specification - -Add following the paragraph describing parameter _event_ to -*clEnqueueAcquireGLObjects*: - -"`If an OpenGL context is bound to the current thread, then any OpenGL -commands which - - . affect or access the contents of a memory object listed in the - _mem_objects_ list, and - . were issued on that OpenGL context prior to the call to - *clEnqueueAcquireGLObjects* - -will complete before execution of any OpenCL commands following the -*clEnqueueAcquireGLObjects* which affect or access any of those memory -objects. -If a non-`NULL` _event_ object is returned, it will report completion only -after completion of such OpenGL commands.`" - -Add following the paragraph describing parameter _event_ to -*clEnqueueReleaseGLObjects*: - -"`If an OpenGL context is bound to the current thread, then then any OpenGL -commands which - - . affect or access the contents of the memory objects listed in the - _mem_objects_ list, and - . are issued on that context after the call to *clEnqueueReleaseGLObjects* - -will not execute until after execution of any OpenCL commands preceding the - -*clEnqueueReleaseGLObjects* which affect or access any of those memory -objects. -If a non-`NULL` _event_ object is returned, it will report completion before -execution of such OpenGL commands.`" - -Replace the second paragraph of -<> with: - -"`Prior to calling *clEnqueueAcquireGLObjects*, the application must ensure -that any pending OpenGL operations which access the objects specified in -_mem_objects_ have completed. - -If the *cl_khr_gl_event* extension is supported, then the OpenCL -implementation will ensure that any such pending OpenGL operations are -complete for an OpenGL context bound to the same thread as the OpenCL -context. -This is referred to as _implicit synchronization_. - -If the *cl_khr_gl_event* extension is supported and the OpenGL context in -question supports fence sync objects, completion of OpenGL commands may also -be determined by placing a GL fence command after those commands using -*glFenceSync*, creating an event from the resulting GL sync object using -*clCreateEventFromGLsyncKHR*, and determining completion of that event -object via *clEnqueueAcquireGLObjects*. -This method may be considerably more efficient than calling *glFinish*, and -is referred to as _explicit synchronization_. -Explicit synchronization is most useful when an OpenGL context bound to -another thread is accessing the memory objects. - -If the *cl_khr_gl_event* extension is not supported, completion of OpenGL -commands may be determined by issuing and waiting for completion of a -*glFinish* command on all OpenGL contexts with pending references to these -objects. -Some implementations may offer other efficient synchronization methods. -If such methods exist they will be described in platform-specific -documentation. - -Note that no synchronization method other than *glFinish* is portable -between all OpenGL implementations and all OpenCL implementations. -While this is the only way to ensure completion that is portable to all -platforms, *glFinish* is an expensive operation and its use should be -avoided if the *cl_khr_gl_event* extension is supported on a platform.`" - -[[cl_khr_gl_event-issues]] -=== Issues - - . How are references between CL events and GL syncs handled? -+ --- -PROPOSED: The linked CL event places a single reference on the GL sync -object. -That reference is removed when the CL event is deleted. -A more expensive alternative would be to reflect changes in the CL event -reference count through to the GL sync. --- - - . How are linkages to synchronization primitives in other APIs handled? -+ --- -UNRESOLVED. -We will at least want to have a way to link events to EGL sync objects. -There is probably no analogous DX concept. -There would be an entry point for each type of synchronization primitive to -be linked to, such as clCreateEventFromEGLSyncKHR. - -An alternative is a generic clCreateEventFromExternalEvent taking an -attribute list. -The attribute list would include information defining the type of the -external primitive and additional information (GL sync object handle, EGL -display and sync object handle, etc.) specific to that type. -This allows a single entry point to be reused. - -These will probably be separate extensions following the API proposed here. --- - - . Should the CL_EVENT_COMMAND_TYPE correspond to the type of command - (fence) or the type of the linked sync object? -+ --- -PROPOSED: To the type of the linked sync object. --- - - . Should we support both explicit and implicit synchronization? -+ --- -PROPOSED: Yes. -Implicit synchronization is suitable when GL and CL are executing in the -same application thread. -Explicit synchronization is suitable when they are executing in different -threads but the expense of glFinish is too high. --- - - . Should this be a platform or device extension? -+ --- -PROPOSED: Platform extension. -This may result in considerable under-the-hood work to implement the -sync->event semantics using only the public GL API, however, when multiple -drivers and devices with different GL support levels coexist in the same -runtime. --- - - . Where can events generated from GL syncs be usable? -+ --- -PROPOSED: Only with clEnqueueAcquireGLObjects, and attempting to use such an -event elsewhere will generate an error. -There is no apparent use case for using such events elsewhere, and possibly -some cost to supporting it, balanced by the cost of checking the source of -events in all other commands accepting them as parameters. --- diff --git a/ext/cl_khr_gl_msaa_sharing.asciidoc b/ext/cl_khr_gl_msaa_sharing.asciidoc deleted file mode 100644 index 91fad53b..00000000 --- a/ext/cl_khr_gl_msaa_sharing.asciidoc +++ /dev/null @@ -1,405 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_gl_msaa_sharing]] -== Creating OpenCL Memory Objects from OpenGL MSAA Textures - -This extension extends the OpenCL / OpenGL sharing (the -cl_khr_gl_sharing_extension) defined in -<> to allow an OpenCL image to be created from an OpenGL -multi-sampled (a.k.a. -MSAA) texture (color or depth). - -This extension name is *cl_khr_gl_msaa_sharing*. -This extension requires *cl_khr_gl_depth_images*. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_gl_msaa_sharing-additions-to-extension-specification]] -=== Additions to the OpenCL Extension Specification - -Allow _texture_target_ argument to *clCreateFromGLTexture* to be -GL_TEXTURE_2D_MULTISAMPLE or GL_TEXTURE_2D_MULTISAMPLE_ARRAY. - -If _texture_target_ is GL_TEXTURE_2D_MULTISAMPLE, *clCreateFromGLTexture* -creates an OpenCL 2D multi-sample image object from an OpenGL 2D -multi-sample texture. - -If _texture_target_ is GL_TEXTURE_2D_MULTISAMPLE_ARRAY, -*clCreateFromGLTexture* creates an OpenCL 2D multi-sample array image object -from an OpenGL 2D multi-sample texture. - -Multi-sample OpenCL image objects can only be read from a kernel. -Multi-sample OpenCL image objects cannot be used as arguments to -clEnqueueReadImage , clEnqueueWriteImage, clEnqueueCopyImage, -clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapImage -and clEnqueueFillImage and will return a CL_INVALID_OPERATION error. - -*Add the following entry to the table describing -<>:* - -[cols=",,",options="header",] -|==== -| *cl_gl_texture_info* -| *Return Type* -| *Info. returned in _param_value_* - -| *CL_GL_NUM_SAMPLES* -| GLsizei -| The _samples_ argument passed to *glTexImage2DMultisample* or - *glTexImage3DMultisample*. - - If _image_ is not a MSAA texture, 1 is returned. -|==== - -[[cl_khr_gl_msaa_sharing-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -The formats described in tables 5.8.a and 5.8.b of the OpenCL 2.2 -specification and the additional formats described in -<> also support OpenCL images created from a OpenGL -multi-sampled color or depth texture. - -*Update text that describes arg value argument to clSetKernelArg with the -following:* - -"`If the argument is a multi-sample 2D image, the _arg_value_ entry must be -a pointer to a multi-sample image object. -If the argument is a multi-sample 2D depth image, the _arg_value_ entry must -be a pointer to a multisample depth image object. -If the argument is a multi-sample 2D image array, the _arg_value_ entry must -be a pointer to a multi-sample image array object. -If the argument is a multi-sample 2D depth image array, the _arg_value_ -entry must be a pointer to a multi-sample depth image array object.`" - -*Updated error code text for clSetKernelArg is:* - -*Add the following text:* - -"`CL_INVALID_MEM_OBJECT for an argument declared to be a multi-sample image, -multi-sample image array, multi-sample depth image or a multi-sample depth -image array and the argument value specified in _arg_value_ does not follow -the rules described above for a depth memory object or memory array object -argument.`" - -[[cl_khr_gl_msaa_sharing-additions-to-chapter-6]] -=== Additions to Chapter 6 of the OpenCL 2.2 Specification - -*Add the following new data types to _table 6.3_ in _section 6.1.3_ of the -OpenCL 2.2 specification:* - -[cols=",",options="header",] -|==== -| *Type* -| *Description* - -| *image2d_msaa_t* -| A 2D multi-sample color image. - Refer to _section 6.13.14_ for a detailed description of the built-in - functions that use this type. - -| *image2d_array_msaa_t* -| A 2D multi-sample color image array. - Refer to _section 6.13.14_ for a detailed description of the built-in - functions that use this type. - -| *image2d_msaa_depth_t* -| A 2D multi-sample depth image. - Refer to _section 6.13.14_ for a detailed description of the built-in - functions that use this type. - -| *image2d_array_msaa_depth_t* -| A 2D multi-sample depth image array. - Refer to _section 6.13.14_ for a detailed description of the built-in - functions that use this type. - -|==== - -*Add the following built-in functions to section 6.13.14.3 -- Built-in Image -Sampler-less Read Functions:* - -[source,opencl_c] ----- -float4 read_imagef( - image2d_msaa_t image, - int2 coord, - int sample) ----- - -Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element lookup -in the 2D image object specified by _image_. - -*read_imagef* returns floating-point values in the range [0.0 ... 1.0] for -image objects created with _image_channel_data_type_ set to one of the -pre-defined packed formats or CL_UNORM_INT8, or CL_UNORM_INT16. - -*read_imagef* returns floating-point values in the range [-1.0 ... 1.0] for -image objects created with _image_channel_data_type_ set to CL_SNORM_INT8, -or CL_SNORM_INT16. - -*read_imagef* returns floating-point values for image objects created with -_image_channel_data_type_ set to CL_HALF_FLOAT or CL_FLOAT. - -Values returned by *read_imagef* for image objects with -_image_channel_data_type_ values not specified in the description above are -undefined. - -[source,opencl_c] ----- -int4 read_imagei(image2d_msaa_t image, - int2 coord, - int sample) - -uint4 read_imageui(image2d_msaa_t image, - int2 coord, - int sample) ----- - -Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element lookup -in the 2D image object specified by _image_. - -*read_imagei* and *read_imageui* return unnormalized signed integer and -unsigned integer values respectively. -Each channel will be stored in a 32-bit integer. - -*read_imagei* can only be used with image objects created with -_image_channel_data_type_ set to one of the following values: - - * CL_SIGNED_INT8, - * CL_SIGNED_INT16, and - * CL_SIGNED_INT32. - -If the _image_channel_data_type_ is not one of the above values, the values -returned by *read_imagei* are undefined. - -*read_imageui* can only be used with image objects created with -_image_channel_data_type_ set to one of the following values: - - * CL_UNSIGNED_INT8, - * CL_UNSIGNED_INT16, and - * CL_UNSIGNED_INT32. - -If the _image_channel_data_type_ is not one of the above values, the values -returned by *read_imageui* are undefined. - -[source,opencl_c] ----- -float4 read_imagef(image2d_array_msaa_t image, - int4 coord, - int sample) ----- - -Use _coord.xy_ and _sample_ to do an element lookup in the 2D image -identified by _coord.z_ in the 2D image array specified by _image_. - -*read_imagef* returns floating-point values in the range [0.0 ... 1.0] for -image objects created with _image_channel_data_type_ set to one of the -pre-defined packed formats or CL_UNORM_INT8, or CL_UNORM_INT16. - -*read_imagef* returns floating-point values in the range [-1.0 ... 1.0] for -image objects created with _image_channel_data_type_ set to CL_SNORM_INT8, -or CL_SNORM_INT16. - -*read_imagef* returns floating-point values for image objects created with -_image_channel_data_type_ set to CL_HALF_FLOAT or CL_FLOAT. - -Values returned by *read_imagef* for image objects with -_image_channel_data_type_ values not specified in the description above are -undefined. - - -[source,opencl_c] ----- -int4 read_imagei(image2d_array_msaa_t image, - int4 coord, - int sample) - -uint4 read_imageui(image2d_array_msaa_t image, - int4 coord, - int sample) ----- - -Use _coord.xy_ and _sample_ to do an element lookup in the 2D image -identified by _coord.z_ in the 2D image array specified by _image_. - -*read_imagei* and *read_imageui* return unnormalized signed integer and -unsigned integer values respectively. -Each channel will be stored in a 32-bit integer. - -*read_imagei* can only be used with image objects created with -_image_channel_data_type_ set to one of the following values: - - * CL_SIGNED_INT8, - * CL_SIGNED_INT16, and - * CL_SIGNED_INT32. - -If the _image_channel_data_type_ is not one of the above values, the values -returned by *read_imagei* are undefined. - -*read_imageui* can only be used with image objects created with -_image_channel_data_type_ set to one of the following values: - - * CL_UNSIGNED_INT8, - * CL_UNSIGNED_INT16, and - * CL_UNSIGNED_INT32. - -If the _image_channel_data_type_ is not one of the above values, the values -returned by *read_imageui* are undefined. - -[source,opencl_c] ----- -float read_imagef(image2d_msaa_depth_t image, - int2 coord, - int sample) ----- - -Use the coordinate _(coord.x, coord.y)_ and _sample_ to do an element lookup -in the 2D depth image object specified by _image_. - -*read_imagef* returns a floating-point value in the range [0.0 ... 1.0] for -depth image objects created with _image_channel_data_type_ set to -CL_UNORM_INT16 or CL_UNORM_INT24. - -*read_imagef* returns a floating-point value for depth image objects created -with _image_channel_data_type_ set to CL_FLOAT. - -Values returned by *read_imagef* for image objects with -_image_channel_data_type_ values not specified in the description above are -undefined. - -[source,opencl_c] ----- -float read_imagef(image2d_array_msaaa_depth_t image, - int4 coord, - int sample) ----- - -Use _coord.xy_ and _sample_ to do an element lookup in the 2D image -identified by _coord.z_ in the 2D depth image array specified by _image_. - -*read_imagef* returns a floating-point value in the range [0.0 ... 1.0] for -depth image objects created with _image_channel_data_type_ set to -CL_UNORM_INT16 or CL_UNORM_INT24. - -*read_imagef* returns a floating-point value for depth image objects created -with _image_channel_data_type_ set to CL_FLOAT. - -Values returned by *read_imagef* for image objects with -_image_channel_data_type_ values not specified in the description above are -undefined. - -Note: When a multisample image is accessed in a kernel, the access takes one -vector of integers describing which pixel to fetch and an integer -corresponding to the sample numbers describing which sample within the pixel -to fetch. -sample identifies the sample position in the multi-sample image. - -*For best performance, we recommend that _sample_ be a literal value so it -is known at compile time and the OpenCL compiler can perform appropriate -optimizations for multi-sample reads on the device*. - -No standard sampling instructions are allowed on the multisample image. -Accessing a coordinate outside the image and/or a sample that is outside the -number of samples associated with each pixel in the image is undefined - -*Add the following built-in functions to section 6.13.14.5 -- Built-in Image -Query Functions:* - -[source,opencl_c] ----- -int get_image_width(image2d_msaa_t image) - -int get_image_width(image2d_array_msaa_t image) - -int get_image_width(image2d_msaa_depth_t image) - -int get_image_width(image2d_array_msaa_depth_t image) ----- - -Return the image width in pixels. - -[source,opencl_c] ----- -int get_image_height(image2d_msaa_t image) - -int get_image_height(image2d_array_msaa_t image) - -int get_image_height(image2d_msaa_depth_t image) - -int get_image_height(image2d_array_msaa_depth_t image) ----- - -Return the image height in pixels. - -[source,opencl_c] ----- -int get_image_channel_data_type(image2d_msaa_t image) - -int get_image_channel_data_type(image2d_array_msaa_t image) - -int get_image_channel_data_type(image2d_msaa_depth_t image) - -int get_image_channel_data_type(image2d_array_msaa_depth_t image) ----- - -Return the channel data type. - -[source,opencl_c] ----- -int get_image_channel_order(image2d_msaa_t image) - -int get_image_channel_order(image2d_array_msaa_t image) - -int get_image_channel_order(image2d_msaa_depth_t image) - -int get_image_channel_order(image2d_array_msaa_depth_t image) ----- - -Return the image channel order. - -[source,opencl_c] ----- -int2 get_image_dim(image2d_msaa_t image) - -int2 get_image_dim(image2d_array_msaa_t image) - -int2 get_image_dim(image2d_msaa_depth_t image) - -int2 get_image_dim(image2d_array_msaa_depth_t image) ----- - -Return the 2D image width and height as an int2 type. -The width is returned in the _x_ component, and the height in the _y_ -component. - -[source,opencl_c] ----- -size_t get_image_array_size(image2d_array_msaa_depth_t image) ----- - -Return the number of images in the 2D image array. - -[source,opencl_c] ----- -int get_image_num_samples(image2d_msaa_t image) - -int get_image_num_samples(image2d_array_msaa_t image) - -int get_image_num_samples(image2d_msaa_depth_t image) - -int get_image_num_samples(image2d_array_msaa_depth_t image) ----- - -Return the number of samples in the 2D MSAA image diff --git a/ext/cl_khr_gl_sharing__context.asciidoc b/ext/cl_khr_gl_sharing__context.asciidoc deleted file mode 100644 index ac0cc138..00000000 --- a/ext/cl_khr_gl_sharing__context.asciidoc +++ /dev/null @@ -1,459 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_gl_sharing]] -== Creating an OpenCL Context from an OpenGL Context or Share Group - -[[cl_khr_gl_sharing-overview]] -=== Overview - -This section describes functionality in the *cl_khr_gl_sharing* extension -to associate an OpenCL context with an OpenGL context or share group object. -Once an OpenCL context is associated with an OpenGL context or share group -object, the functionality described in the section -<> -may be used to share OpenGL buffer, texture, and renderbuffer objects with the OpenCL context. - -An OpenGL implementation supporting buffer objects and sharing of texture -and buffer object images with OpenCL is required by this extension. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_gl_sharing-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_int clGetGLContextInfoKHR(const cl_context_properties *properties, - cl_gl_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); ----- - -[[cl_khr_gl_sharing-new-tokens]] -=== New Tokens - -Returned by *clCreateContext*, *clCreateContextFromType*, and -*clGetGLContextInfoKHR* when an invalid OpenGL context or share group object -handle is specified in _properties_: - ----- -CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR ----- - -Accepted as the _param_name_ argument of *clGetGLContextInfoKHR*: - ----- -CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR -CL_DEVICES_FOR_GL_CONTEXT_KHR ----- - -Accepted as an attribute name in the _properties_ argument of -*clCreateContext* and *clCreateContextFromType*: - ----- -CL_GL_CONTEXT_KHR -CL_EGL_DISPLAY_KHR -CL_GLX_DISPLAY_KHR -CL_WGL_HDC_KHR -CL_CGL_SHAREGROUP_KHR ----- - -[[cl_khr_gl_sharing-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -In _section 4.4_, replace the description of _properties_ under -*clCreateContext* with: - -"`_properties_ points to an attribute list, which is a array of ordered - pairs terminated with zero. -If an attribute is not specified in _properties_, then its default value -(listed in _table 4.5_) is used (it is said to be specified implicitly). -If _properties_ is `NULL` or empty (points to a list whose first value is -zero), all attributes take on their default values. - -Attributes control sharing of OpenCL memory objects with OpenGL buffer, -texture, and renderbuffer objects. -Depending on the platform-specific API used to bind OpenGL contexts to the -window system, the following attributes may be set to identify an OpenGL -context: - - * When the CGL binding API is supported, the attribute - CL_CGL_SHAREGROUP_KHR should be set to a CGLShareGroup handle to a CGL - share group object. - * When the EGL binding API is supported, the attribute CL_GL_CONTEXT_KHR - should be set to an EGLContext handle to an OpenGL ES or OpenGL context, - and the attribute CL_EGL_DISPLAY_KHR should be set to the EGLDisplay - handle of the display used to create the OpenGL ES or OpenGL context. - * When the GLX binding API is supported, the attribute CL_GL_CONTEXT_KHR - should be set to a GLXContext handle to an OpenGL context, and the - attribute CL_GLX_DISPLAY_KHR should be set to the Display handle of the - X Window System display used to create the OpenGL context. - * When the WGL binding API is supported, the attribute CL_GL_CONTEXT_KHR - should be set to an HGLRC handle to an OpenGL context, and the attribute - CL_WGL_HDC_KHR should be set to the HDC handle of the display used to - create the OpenGL context. - -Memory objects created in the context so specified may be shared with the -specified OpenGL or OpenGL ES context (as well as with any other OpenGL -contexts on the share list of that context, according to the description of -sharing in the GLX 1.4 and EGL 1.4 specifications, and the WGL documentation -for OpenGL implementations on Microsoft Windows), or with the explicitly -identified OpenGL share group for CGL. -If no OpenGL or OpenGL ES context or share group is specified in the -attribute list, then memory objects may not be shared, and calling any of -the commands described in <> will result in a -CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR error.`" - -OpenCL / OpenGL sharing does not support the CL_CONTEXT_INTEROP_USER_SYNC -property defined in _table 4.5_. -Specifying this property when creating a context with OpenCL / OpenGL -sharing will return an appropriate error. - -Add to _table 4.5_: - -._OpenGL Sharing Context Creation Attributes_ -[cols=",,",options="header",] -|==== -| *Attribute Name* -| *Allowed Values* - - *(Default value is in bold)* -| *Description* - -| CL_GL_CONTEXT_KHR -| *0*, OpenGL context handle -| OpenGL context to associated the OpenCL context with - -| CL_CGL_SHAREGROUP_KHR -| *0*, CGL share group handle -| CGL share group to associate the OpenCL context with - -| CL_EGL_DISPLAY_KHR -| *EGL_NO_DISPLAY*, EGLDisplay handle -| EGLDisplay an OpenGL context was created with respect to - -| CL_GLX_DISPLAY_KHR -| *None*, X handle -| X Display an OpenGL context was created with respect to - -| CL_WGL_HDC_KHR -| *0*, HDC handle -| HDC an OpenGL context was created with respect to -|==== - -Replace the first error in the list for *clCreateContext* with: - -"`_errcode_ret_ returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a context -was specified by any of the following means: - - * A context was specified for an EGL-based OpenGL ES or OpenGL - implementation by setting the attributes CL_GL_CONTEXT_KHR and - CL_EGL_DISPLAY_KHR. - * A context was specified for a GLX-based OpenGL implementation by setting - the attributes CL_GL_CONTEXT_KHR and CL_GLX_DISPLAY_KHR. - * A context was specified for a WGL-based OpenGL implementation by setting - the attributes CL_GL_CONTEXT_KHR and CL_WGL_HDC_KHR - -and any of the following conditions hold: - - * The specified display and context attributes do not identify a valid - OpenGL or OpenGL ES context. - * The specified context does not support buffer and renderbuffer objects. - * The specified context is not compatible with the OpenCL context being - created (for example, it exists in a physically distinct address space, - such as another hardware device; or it does not support sharing data - with OpenCL due to implementation restrictions). - -_errcode_ret_ returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a share -group was specified for a CGL-based OpenGL implementation by setting the -attribute CL_CGL_SHAREGROUP_KHR, and the specified share group does not -identify a valid CGL share group object. - -_errcode_ret_ returns CL_INVALID_OPERATION if a context was specified as -described above and any of the following conditions hold: - - * A context or share group object was specified for one of CGL, EGL, GLX, - or WGL and the OpenGL implementation does not support that window-system - binding API. - * More than one of the attributes CL_CGL_SHAREGROUP_KHR, - CL_EGL_DISPLAY_KHR, CL_GLX_DISPLAY_KHR, and CL_WGL_HDC_KHR is set to a - non-default value. - * Both of the attributes CL_CGL_SHAREGROUP_KHR and CL_GL_CONTEXT_KHR are - set to non-default values. - * Any of the devices specified in the _devices_ argument cannot support - OpenCL objects which share the data store of an OpenGL object. - -_errcode_ret_ returns CL_INVALID_PROPERTY if an attribute name other than -those specified in _table 4.5_ or if CL_CONTEXT_INTEROP_USER_SYNC is -specified in _properties_.`" - -Replace the description of _properties_ under *clCreateContextFromType* -with: - -"`_properties_ points to an attribute list whose format and valid contents -are identical to the *properties* argument of *clCreateContext*.`" - -Replace the first error in the list for *clCreateContextFromType* with the -same two new errors described above for *clCreateContext*. - -[[cl_khr_gl_sharing-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -Add a new section to describe the new API for querying OpenCL devices that -support sharing with OpenGL: - -"`OpenCL device(s) corresponding to an OpenGL context may be queried. -Such a device may not always exist (for example, if an OpenGL context is -specified on a GPU not supporting OpenCL command-queues, but which does -support shared CL/GL objects), and if it does exist, may change over time. -When such a device does exist, acquiring and releasing shared CL/GL objects -may be faster on a command-queue corresponding to this device than on -command-queues corresponding to other devices available to an OpenCL -context. - -To query the currently corresponding device, use the function -indexterm:[clGetGLContextInfoKHR] -[source,opencl] ----- -cl_int clGetGLContextInfoKHR(const cl_context_properties *properties, - cl_gl_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) ----- - -_properties_ points to an attribute list whose format and valid contents are -identical to the _properties_ argument of *clCreateContext*. -_properties_ must identify a single valid GL context or GL share group -object. - -_param_name_ is a constant that specifies the device types to query, and -must be one of the values shown in the table below. - -_param_value_ is a pointer to memory where the result of the query is -returned as described in the table below. -If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ specifies the size in bytes of memory pointed to by -_param_value_. -This size must be greater than or equal to the size of the return type -described in the table below. - -_param_value_size_ret_ returns the actual size in bytes of data being -queried by _param_value_. -If _param_value_size_ret_ is `NULL`, it is ignored. - -[[cl_khr_gl_sharing-clGetGLContextInfoKHR-table]] -._Supported Device Types for_ *clGetGLContextInfoKHR* -[cols="2,1,2",options="header",] -|==== -| *param_name* -| *Return Type* -| *Information returned in param_value* - -| CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR -| *cl_device_id* -| Return the OpenCL device currently associated with the specified OpenGL - context. - -| CL_DEVICES_FOR_GL_CONTEXT_KHR -| *cl_device_id[]* -| Return all OpenCL devices which may be associated with the specified - OpenGL context. -|==== - -*clGetGLContextInfoKHR* returns CL_SUCCESS if the function is executed -successfully. -If no device(s) exist corresponding to _param_name_, the call will not fail, -but the value of _param_value_size_ret_ will be zero. - -*clGetGLContextInfoKHR* returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a -context was specified by any of the following means: - - * A context was specified for an EGL-based OpenGL ES or OpenGL - implementation by setting the attributes CL_GL_CONTEXT_KHR and - CL_EGL_DISPLAY_KHR. - * A context was specified for a GLX-based OpenGL implementation by setting - the attributes CL_GL_CONTEXT_KHR and CL_GLX_DISPLAY_KHR. - * A context was specified for a WGL-based OpenGL implementation by setting - the attributes CL_GL_CONTEXT_KHR and CL_WGL_HDC_KHR. - -and any of the following conditions hold: - - * The specified display and context attributes do not identify a valid - OpenGL or OpenGL ES context. - * The specified context does not support buffer and renderbuffer objects. - * The specified context is not compatible with the OpenCL context being - created (for example, it exists in a physically distinct address space, - such as another hardware device; or it does not support sharing data - with OpenCL due to implementation restrictions). - -*clGetGLContextInfoKHR* returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a -share group was specified for a CGL-based OpenGL implementation by setting -the attribute CL_CGL_SHAREGROUP_KHR, and the specified share group does not -identify a valid CGL share group object. - -*clGetGLContextInfoKHR* returns CL_INVALID_OPERATION if a context was -specified as described above and any of the following conditions hold: - - * A context or share group object was specified for one of CGL, EGL, GLX, - or WGL and the OpenGL implementation does not support that window-system - binding API. - * More than one of the attributes CL_CGL_SHAREGROUP_KHR, - CL_EGL_DISPLAY_KHR, CL_GLX_DISPLAY_KHR, and CL_WGL_HDC_KHR is set to a - non-default value. - * Both of the attributes CL_CGL_SHAREGROUP_KHR and CL_GL_CONTEXT_KHR are - set to non-default values. - * Any of the devices specified in the argument cannot support - OpenCL objects which share the data store of an OpenGL object. - -*clGetGLContextInfoKHR* returns CL_INVALID_VALUE if an attribute name other -than those specified in _table 4.5_ is specified in _properties_. - -Additionally, *clGetGLContextInfoKHR* returns CL_INVALID_VALUE if -_param_name_ is not one of the values listed in the table -<>, or if the size in bytes -specified by _param_value_size_ is less than the size of the return type -shown in the table and _param_value_ is not a `NULL` value; -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by -the OpenCL implementation on the device; or CL_OUT_OF_HOST_MEMORY if there -is a failure to allocate resources required by the OpenCL implementation on -the host.`" - -[[cl_khr_gl_sharing-issues]] -=== Issues - - . How should the OpenGL context be identified when creating an associated - OpenCL context? -+ --- -RESOLVED: by using a (display,context handle) attribute pair to identify an -arbitrary OpenGL or OpenGL ES context with respect to one of the -window-system binding layers EGL, GLX, or WGL, or a share group handle to -identify a CGL share group. -If a context is specified, it need not be current to the thread calling -clCreateContext*. - -A previously suggested approach would use a single boolean attribute -CL_USE_GL_CONTEXT_KHR to allow creating a context associated with the -currently bound OpenGL context. -This may still be implemented as a separate extension, and might allow more -efficient acquire/release behavior in the special case where they are being -executed in the same thread as the bound GL context used to create the CL -context. --- - - . What should the format of an attribute list be? -+ --- -After considerable discussion, we think we can live with a list of - pairs terminated by zero. -The list is passed as 'cl_context_properties *_properties'_, where -cl_context_properties is typedefed to be 'intptr_t' in cl.h. - -This effectively allows encoding all scalar integer, pointer, and handle -values in the host API into the argument list and is analogous to the -structure and type of EGL attribute lists. -`NULL` attribute lists are also allowed. -Again as for EGL, any attributes not explicitly passed in the list will take -on a defined default value that does something reasonable. - -Experience with EGL, GLX, and WGL has shown attribute lists to be a -sufficiently flexible and general mechanism to serve the needs of management -calls such as context creation. -It is not completely general (encoding floating-point and non-scalar -attribute values is not straightforward), and other approaches were -suggested such as opaque attribute lists with getter/setter methods, or -arrays of varadic structures. --- - - . What's the behavior of an associated OpenGL or OpenCL context when using - resources defined by the other associated context, and that context is - destroyed? -+ --- -RESOLVED: OpenCL objects place a reference on the data store underlying the -corresponding GL object when they're created. -The GL name corresponding to that data store may be deleted, but the data -store itself remains so long as any CL object has a reference to it. -However, destroying all GL contexts in the share group corresponding to a CL -context results in implementation-dependent behavior when using a -corresponding CL object, up to and including program termination. --- - - . How about sharing with D3D? -+ --- -Sharing between D3D and OpenCL should use the same attribute list mechanism, -though obviously with different parameters, and be exposed as a similar -parallel OpenCL extension. -There may be an interaction between that extension and this one since it's -not yet clear if it will be possible to create a CL context simultaneously -sharing GL and D3D objects. --- - - . Under what conditions will context creation fail due to sharing? -+ --- -RESOLVED: Several cross-platform failure conditions are described (GL -context or CGL share group doesn't exist, GL context doesn't support types -of GL objects, GL context implementation doesn't allow sharing), but -additional failures may result due to implementation-dependent reasons and -should be added to this extension as such failures are discovered. -Sharing between OpenCL and OpenGL requires integration at the driver -internals level. --- - - . What command-queues can *clEnqueueAcquire/ReleaseGLObjects* be placed - on? -+ --- -RESOLVED: All command-queues. -This restriction is enforced at context creation time. -If any device passed to context creation cannot support shared CL/GL -objects, context creation will fail with a CL_INVALID_OPERATION error. --- - - . How can applications determine which command-queue to place an - Acquire/Release on? -+ --- -RESOLVED: The *clGetGLContextInfoKHR* returns either the CL device currently -corresponding to a specified GL context (typically the display it's running -on), or a list of all the CL devices the specified context might run on -(potentially useful in multiheaded / "`virtual screen`" environments). -This command is not simply placed in <> because it relies on the same -property-list method of specifying a GL context introduced by this -extension. - -If no devices are returned, it means that the GL context exists on an older -GPU not capable of running OpenCL, but still capable of sharing objects -between GL running on that GPU and CL running elsewhere. --- - - . What is the meaning of the CL_DEVICES_FOR_GL_CONTEXT_KHR query? -+ --- -RESOLVED: The list of all CL devices that may ever be associated with a -specific GL context. -On platforms such as MacOS X, the "`virtual screen`" concept allows multiple -GPUs to back a single virtual display. -Similar functionality might be implemented on other windowing systems, such -as a transparent heterogenous multiheaded X server. -Therefore the exact meaning of this query is interpreted relative to the -binding layer API in use. --- diff --git a/ext/cl_khr_gl_sharing__memobjs.asciidoc b/ext/cl_khr_gl_sharing__memobjs.asciidoc deleted file mode 100644 index 2de4b292..00000000 --- a/ext/cl_khr_gl_sharing__memobjs.asciidoc +++ /dev/null @@ -1,778 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_gl_sharing__memobjs]] -== Creating OpenCL Memory Objects from OpenGL Objects - -This section describes functionality in the *cl_khr_gl_sharing* extension -to use OpenGL buffer, texture, and renderbuffer objects as OpenCL memory objects. -OpenCL memory objects may be created from OpenGL objects if and only if the -OpenCL context is associated with an OpenGL context or share group object. -The section <> -describes how to create an OpenCL context associated with an OpenGL context or share group object. - -An OpenCL image object may be created from an OpenGL texture or renderbuffer object. -An OpenCL buffer object may be created from an OpenGL buffer object. - -Any supported OpenGL object defined within the associated OpenGL context -or share group object may be shared, with the exception of the default -OpenGL objects (i.e. objects named zero), which may not be shared. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_gl_sharing__memobjs-lifetime-of-shared-objects]] -=== Lifetime of Shared Objects - -An OpenCL memory object created from an OpenGL object (hereinafter referred -to as a "`shared CL/GL object`") remains valid as long as the corresponding -GL object has not been deleted. -If the GL object is deleted through the GL API (e.g. *glDeleteBuffers*, -*glDeleteTextures,* or *glDeleteRenderbuffers*), subsequent use of the CL -buffer or image object will result in undefined behavior, including but not -limited to possible CL errors and data corruption, but may not result in -program termination. - -The CL context and corresponding command-queues are dependent on the -existence of the GL share group object, or the share group associated with -the GL context from which the CL context is created. -If the GL share group object or all GL contexts in the share group are -destroyed, any use of the CL context or command-queue(s) will result in -undefined behavior, which may include program termination. -Applications should destroy the CL command-queue(s) and CL context before -destroying the corresponding GL share group or contexts - -[[cl_khr_gl_sharing__memobjs-cl-buffer-objects-from-gl-buffer-objects]] -=== OpenCL Buffer Objects from OpenGL Buffer Objects - -The function -indexterm:[clCreateFromGLBuffer] -[source,opencl] ----- -cl_mem clCreateFromGLBuffer(cl_context context, - cl_mem_flags flags, - GLuint bufobj, - cl_int *errcode_ret) ----- - -creates an OpenCL buffer object from an OpenGL buffer object. - -_context_ is a valid OpenCL context created from an OpenGL context. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values -specified in _table 5.3_ can be used. - -_bufobj_ is the name of a GL buffer object. -The data store of the GL buffer object must have have been previously -created by calling *glBufferData*, although its contents need not be -initialized. -The size of the data store will be used to determine the size of the CL -buffer object. - -_errcode_ret_ will return an appropriate error code as described below. -If _errcode_ret_ is `NULL`, no error code is returned. - -*clCreateFromGLBuffer* returns a valid non-zero OpenCL buffer object and -_errcode_ret_ is set to CL_SUCCESS if the buffer object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid context or was not - created from a GL context. - * CL_INVALID_VALUE if values specified in _flags_ are not valid. - * CL_INVALID_GL_OBJECT if _bufobj_ is not a GL buffer object or is a GL - buffer object but does not have an existing data store or the size of - the buffer is 0. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The size of the GL buffer object data store at the time -*clCreateFromGLBuffer* is called will be used as the size of buffer object -returned by *clCreateFromGLBuffer*. -If the state of a GL buffer object is modified through the GL API (e.g. -*glBufferData*) while there exists a corresponding CL buffer object, -subsequent use of the CL buffer object will result in undefined behavior. - -The *clRetainMemObject* and *clReleaseMemObject* functions can be used to -retain and release the buffer object. - -The CL buffer object created using clCreateFromGLBuffer can also be used to -create a CL 1D image buffer object. - -[[cl_khr_gl_sharing__memobjs-cl-image-objects-from-gl-textures]] -=== OpenCL Image Objects from OpenGL Textures - -The function -indexterm:[clCreateFromGLTexture] -[source,opencl] ----- -cl_mem clCreateFromGLTexture(cl_context context, - cl_mem_flags flags, - GLenum texture_target, - GLint miplevel, - GLuint texture, - cl_int *errcode_ret) ----- - -creates the following: - - * an OpenCL 2D image object from an OpenGL 2D texture object or a single - face of an OpenGL cubemap texture object, - * an OpenCL 2D image array object from an OpenGL 2D texture array object, - * an OpenCL 1D image object from an OpenGL 1D texture object, - * an OpenCL 1D image buffer object from an OpenGL texture buffer object, - * an OpenCL 1D image array object from an OpenGL 1D texture array object, - * an OpenCL 3D image object from an OpenGL 3D texture object. - -_context_ is a valid OpenCL context created from an OpenGL context. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values -specified in _table 5.3_ may be used. - -_texture_target_ must be one of GL_TEXTURE_1D, GL_TEXTURE_1D_ARRAY, -GL_TEXTURE_BUFFER, GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D, -GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_POSITIVE_Y, -GL_TEXTURE_CUBE_MAP_POSITIVE_Z, GL_TEXTURE_CUBE_MAP_NEGATIVE_X, -GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, or -GL_TEXTURE_RECTANGLE (Note: GL_TEXTURE_RECTANGLE requires OpenGL 3.1. -Alternatively, GL_TEXTURE_RECTANGLE_ARB may be specified if the OpenGL -extension *GL_ARB_texture_rectangle* is supported.). -_texture_target_ is used only to define the image type of _texture_. -No reference to a bound GL texture object is made or implied by this -parameter. - -_miplevel_ is the mipmap level to be used. -If _texture_target_ is GL_TEXTURE_BUFFER, _miplevel_ must be 0. -Note: Implementations may return CL_INVALID_OPERATION for miplevel -values > 0. - -_texture_ is the name of a GL 1D, 2D, 3D, 1D array, 2D array, cubemap, -rectangle or buffer texture object. -The texture object must be a complete texture as per OpenGL rules on texture -completeness. -The _texture_ format and dimensions defined by OpenGL for the specified -_miplevel_ of the texture will be used to create the OpenCL image memory -object. -Only GL texture objects with an internal format that maps to appropriate -image channel order and data type specified in _tables 5.5_ and _5.6_ may be -used to create the OpenCL image memory object. - -_errcode_ret_ will return an appropriate error code as described below. -If _errcode_ret_ is `NULL`, no error code is returned. - -*clCreateFromGLTexture* returns a valid non-zero OpenCL image object and -_errcode_ret_ is set to CL_SUCCESS if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid context or was not - created from a GL context. - * CL_INVALID_VALUE if values specified in _flags_ are not valid or if - value specified in _texture_target_ is not one of the values specified - in the description of _texture_target_. - * CL_INVALID_MIP_LEVEL if _miplevel_ is less than the value of - _level~base~_ (for OpenGL implementations) or zero (for OpenGL ES - implementations); or greater than the value of _q_ (for both OpenGL and - OpenGL ES). - _level~base~_ and _q_ are defined for the texture in _section 3.8.10_ - (Texture Completeness) of the OpenGL 2.1 specification and _section - 3.7.10_ of the OpenGL ES 2.0. - * CL_INVALID_MIP_LEVEL if _miplevel_ is greather than zero and the OpenGL - implementation does not support creating from non-zero mipmap levels. - * CL_INVALID_GL_OBJECT if _texture_ is not a GL texture object whose type - matches _texture_target_, if the specified _miplevel_ of _texture_ is - not defined, or if the width or height of the specified _miplevel_ is - zero or if the GL texture object is incomplete. - * CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture internal format - does not map to a supported OpenCL image format. - * CL_INVALID_OPERATION if _texture_ is a GL texture object created with a - border width value greater than zero. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -If the state of a GL texture object is modified through the GL API (e.g. -*glTexImage2D*, *glTexImage3D* or the values of the texture parameters -GL_TEXTURE_BASE_LEVEL or GL_TEXTURE_MAX_LEVEL are modified) while there -exists a corresponding CL image object, subsequent use of the CL image -object will result in undefined behavior. - -The *clRetainMemObject* and *clReleaseMemObject* functions can be used to -retain and release the image objects. - -[[cl_khr_gl_sharing__memobjs-list-of-opengl-and-corresponding-opencl-image-formats]] -==== List of OpenGL and corresponding OpenCL Image Formats - -The table below describes the list of OpenGL texture internal formats and -the corresponding OpenCL image formats. -If a OpenGL texture object with an internal format from the table below is -successfully created by OpenGL, then there is guaranteed to be a mapping to -one of the corresponding OpenCL image format(s) in that table. -Texture objects created with other OpenGL internal formats may (but are not -guaranteed to) have a mapping to an OpenCL image format; if such mappings -exist, they are guaranteed to preserve all color components, data types, and -at least the number of bits/component actually allocated by OpenGL for that -format. - -[[cl_khr_gl_sharing__memobjs-mapping-of-image-formats]] -._OpenGL internal formats and corresponding OpenCL internal formats_ -[cols=",",options="header",] -|==== -| *GL internal format* -| *CL image format* - - *(channel order, channel data type)* - -| GL_RGBA8 -| CL_RGBA, CL_UNORM_INT8 or - -CL_BGRA, CL_UNORM_INT8 - -| GL_SRGB8_ALPHA8 -| CL_sRGBA, CL_UNORM_INT8 - -| GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV -| CL_RGBA, CL_UNORM_INT8 - -| GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV -| CL_BGRA, CL_UNORM_INT8 - -| -| - -| GL_RGBA8I, GL_RGBA8I_EXT -| CL_RGBA, CL_SIGNED_INT8 - -| GL_RGBA16I, GL_RGBA16I_EXT -| CL_RGBA, CL_SIGNED_INT16 - -| GL_RGBA32I, GL_RGBA32I_EXT -| CL_RGBA, CL_SIGNED_INT32 - -| -| - -| GL_RGBA8UI, GL_RGBA8UI_EXT -| CL_RGBA, CL_UNSIGNED_INT8 - -| GL_RGBA16UI, GL_RGBA16UI_EXT -| CL_RGBA, CL_UNSIGNED_INT16 - -| GL_RGBA32UI, GL_RGBA32UI_EXT -| CL_RGBA, CL_UNSIGNED_INT32 - -| -| - -| GL_RGBA8_SNORM -| CL_RGBA, CL_SNORM_INT8 - -| GL_RGBA16 -| CL_RGBA, CL_UNORM_INT16 - -| GL_RGBA16_SNORM -| CL_RGBA, CL_SNORM_INT16 - -| GL_RGBA16F, GL_RGBA16F_ARB -| CL_RGBA, CL_HALF_FLOAT - -| GL_RGBA32F, GL_RGBA32F_ARB -| CL_RGBA, CL_FLOAT - -| -| - -| GL_R8 -| CL_R, CL_UNORM_INT8 - -| GL_R8_SNORM -| CL_R, CL_SNORM_INT8 - -| GL_R16 -| CL_R, CL_UNORM_INT16 - -| GL_R16_SNORM -| CL_R, CL_SNORM_INT16 - -| GL_R16F -| CL_R, CL_HALF_FLOAT - -| GL_R32F -| CL_R, CL_FLOAT - -| -| - -| GL_R8I -| CL_R, CL_SIGNED_INT8 - -| GL_R16I -| CL_R, CL_SIGNED_INT16 - -| GL_R32I -| CL_R, CL_SIGNED_INT32 - -| GL_R8UI -| CL_R, CL_UNSIGNED_INT8 - -| GL_R16UI -| CL_R, CL_UNSIGNED_INT16 - -| GL_R32UI -| CL_R, CL_UNSIGNED_INT32 - -| -| - -| GL_RG8 -| CL_RG, CL_UNORM_INT8 - -| GL_RG8_SNORM -| CL_RG, CL_SNORM_INT8 - -| GL_RG16 -| CL_RG, CL_UNORM_INT16 - -| GL_RG16_SNORM -| CL_RG, CL_SNORM_INT16 - -| GL_RG16F -| CL_RG, CL_HALF_FLOAT - -| GL_RG32F -| CL_RG, CL_FLOAT - -| -| - -| GL_RG8I -| CL_RG, CL_SIGNED_INT8 - -| GL_RG16I -| CL_RG, CL_SIGNED_INT16 - -| GL_RG32I -| CL_RG, CL_SIGNED_INT32 - -| GL_RG8UI -| CL_RG, CL_UNSIGNED_INT8 - -| GL_RG16UI -| CL_RG, CL_UNSIGNED_INT16 - -| GL_RG32UI -| CL_RG, CL_UNSIGNED_INT32 -|==== - -[[cl_khr_gl_sharing__memobjs-cl-image-objects-from-gl-renderbuffers]] -=== OpenCL Image Objects from OpenGL Renderbuffers - -The function -indexterm:[clCreateFromGLRenderbuffer] -[source,opencl] ----- -cl_mem clCreateFromGLRenderbuffer(cl_context context, - cl_mem_flags flags, - GLuint renderbuffer, - cl_int *errcode_ret) ----- - -creates an OpenCL 2D image object from an OpenGL renderbuffer object. - -_context_ is a valid OpenCL context created from an OpenGL context. - -_flags_ is a bit-field that is used to specify usage information. -Refer to _table 5.3_ for a description of _flags_. -Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values -specified in _table 5.3_ can be used. - -_renderbuffer_ is the name of a GL renderbuffer object. -The renderbuffer storage must be specified before the image object can be -created. -The _renderbuffer_ format and dimensions defined by OpenGL will be used to -create the 2D image object. -Only GL renderbuffers with internal formats that maps to appropriate image -channel order and data type specified in _tables 5.5_ and _5.6_ can be used -to create the 2D image object. - -_errcode_ret_ will return an appropriate error code as described below. -If _errcode_ret_ is `NULL`, no error code is returned. - -*clCreateFromGLRenderbuffer* returns a valid non-zero OpenCL image object -and _errcode_ret_ is set to CL_SUCCESS if the image object is created -successfully. -Otherwise, it returns a `NULL` value with one of the following error values -returned in _errcode_ret_: - - * CL_INVALID_CONTEXT if _context_ is not a valid context or was not - created from a GL context. - * CL_INVALID_VALUE if values specified in _flags_ are not valid. - * CL_INVALID_GL_OBJECT if _renderbuffer_ is not a GL renderbuffer object - or if the width or height of _renderbuffer_ is zero. - * CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL renderbuffer internal - format does not map to a supported OpenCL image format. - * CL_INVALID_OPERATION if _renderbuffer_ is a multi-sample GL renderbuffer - object. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -If the state of a GL renderbuffer object is modified through the GL API -(i.e. changes to the dimensions or format used to represent pixels of the GL -renderbuffer using appropriate GL API calls such as *glRenderbufferStorage*) -while there exists a corresponding CL image object, subsequent use of the CL -image object will result in undefined behavior. - -The *clRetainMemObject* and *clReleaseMemObject* functions can be used to -retain and release the image objects. - -The table <> describes the -list of OpenGL renderbuffer internal formats and the corresponding OpenCL -image formats. -If an OpenGL renderbuffer object with an internal format from the table is -successfully created by OpenGL, then there is guaranteed to be a mapping to -one of the corresponding OpenCL image format(s) in that table. -Renderbuffer objects created with other OpenGL internal formats may (but are -not guaranteed to) have a mapping to an OpenCL image format; if such -mappings exist, they are guaranteed to preserve all color components, data -types, and at least the number of bits/component actually allocated by -OpenGL for that format. - -[[cl_khr_gl_sharing__memobjs-querying-gl-object-information-from-a-cl-memory-object]] -=== Querying OpenGL object information from an OpenCL memory object - -The OpenGL object used to create the OpenCL memory object and information -about the object type i.e. whether it is a texture, renderbuffer or buffer -object can be queried using the following function. -indexterm:[clGetGLObjectInfo] -[source,opencl] ----- -cl_int clGetGLObjectInfo(cl_mem memobj, - cl_gl_object_type *gl_object_type, - GLuint *gl_object_name) ----- - -_gl_object_type_ returns the type of GL object attached to _memobj_ and can -be CL_GL_OBJECT_BUFFER, CL_GL_OBJECT_TEXTURE2D, CL_GL_OBJECT_TEXTURE3D, -CL_GL_OBJECT_TEXTURE2D_ARRAY, CL_GL_OBJECT_TEXTURE1D, -CL_GL_OBJECT_TEXTURE1D_ARRAY, CL_GL_OBJECT_TEXTURE_BUFFER, or -CL_GL_OBJECT_RENDERBUFFER. -If _gl_object_type_ is `NULL`, it is ignored - -_gl_object_name_ returns the GL object name used to create _memobj_. -If _gl_object_name_ is `NULL`, it is ignored. - -*clGetGLObjectInfo* returns CL_SUCCESS if the call was executed -successfully. -Otherwise, it returns one of the following errors: - - * CL_INVALID_MEM_OBJECT if _memobj_ is not a valid OpenCL memory object. - * CL_INVALID_GL_OBJECT if there is no GL object associated with _memobj_. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function -indexterm:[clGetGLTextureInfo] -[source,opencl] ----- -cl_int clGetGLTextureInfo(cl_mem memobj, - cl_gl_texture_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) ----- - -returns additional information about the GL texture object associated with -_memobj_. - -_param_name_ specifies what additional information about the GL texture -object associated with _memobj_ to query. -The list of supported _param_name_ types and the information returned in -_param_value_ by *clGetGLTextureInfo* is described in the table below. - -_param_value_ is a pointer to memory where the result being queried is -returned. -If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ is used to specify the size in bytes of memory pointed to -by _param_value_. -This size must be >= size of return type as described in the table below. - -_param_value_size_ret_ returns the actual size in bytes of data copied to -_param_value_. -If _param_value_size_ret_ is `NULL`, it is ignored. - -[[cl_khr_gl_sharing__memobjs-clGetGLTextureInfo-queries]] -._OpenGL texture info that may be queried with_ *clGetGLTextureInfo* -[cols=",,",options="header",] -|==== -| *cl_gl_texture_info* -| *Return Type* -| *Info. returned in _param_value_* - -| *CL_GL_TEXTURE_TARGET* -| GLenum -| The _texture_target_ argument specified in *clCreateFromGLTexture*. - -| *CL_GL_MIPMAP_LEVEL* -| GLint -| The _miplevel_ argument specified in *clCreateFromGLTexture*. -|==== - -*clGetGLTextureInfo* returns CL_SUCCESS if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * CL_INVALID_MEM_OBJECT if _memobj_ is not a valid OpenCL memory object. - * CL_INVALID_GL_OBJECT if there is no GL texture object associated with - _memobj_. - * CL_INVALID_VALUE if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is less than the size of the return type - as described in the table above and _param_value_ is not `NULL`, or if - _param_value_ and _param_value_size_ret_ are `NULL`. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_gl_sharing__memobjs-sharing-memory-objects-that-map-to-gl-objects-between-gl-and-cl-contexts]] -=== Sharing memory objects that map to GL objects between GL and CL contexts - -The function -indexterm:[clEnqueueAcquireGLObjects] -[source,opencl] ----- -cl_int clEnqueueAcquireGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -is used to acquire OpenCL memory objects that have been created from OpenGL -objects. -These objects need to be acquired before they can be used by any OpenCL -commands queued to a command-queue or the behaviour is undefined. -The OpenGL objects are acquired by the OpenCL context associated with -_command_queue_ and can therefore be used by all command-queues associated -with the OpenCL context. - -_command_queue_ is a valid command-queue. -All devices used to create the OpenCL context associated with -_command_queue_ must support acquiring shared CL/GL objects. -This constraint is enforced at context creation time. - -_num_objects_ is the number of memory objects to be acquired in -_mem_objects_. - -_mem_objects_ is a pointer to a list of CL memory objects that correspond to -GL objects. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this particular command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in - -_event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command -and can be used to query wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -*clEnqueueAcquireGLObjects* returns CL_SUCCESS if the function is executed -successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing -and returns CL_SUCCESS. -Otherwise, it returns one of the following errors: - - * CL_INVALID_VALUE if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * CL_INVALID_MEM_OBJECT if memory objects in _mem_objects_ are not valid - OpenCL memory objects. - * CL_INVALID_COMMAND_QUEUE if _command_queue_ is not a valid - command-queue. - * CL_INVALID_CONTEXT if context associated with _command_queue_ was not - created from an OpenGL context - * CL_INVALID_GL_OBJECT if memory objects in _mem_objects_ have not been - created from a GL object(s). - * CL_INVALID_EVENT_WAIT_LIST if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -The function -indexterm:[clEnqueueReleaseGLObjects] -[source,opencl] ----- -cl_int clEnqueueReleaseGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -is used to release OpenCL memory objects that have been created from OpenGL -objects. -These objects need to be released before they can be used by OpenGL. -The OpenGL objects are released by the OpenCL context associated with -_command_queue_. - -_num_objects_ is the number of memory objects to be released in -_mem_objects_. - -_mem_objects_ is a pointer to a list of CL memory objects that correspond to -GL objects. - -_event_wait_list_ and _num_events_in_wait_list_ specify events that need to -complete before this command can be executed. -If _event_wait_list_ is `NULL`, then this particular command does not wait -on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by -_event_wait_list_ must be valid and _num_events_in_wait_list_ must be -greater than 0. -The events specified in _event_wait_list_ act as synchronization points. - -_event_ returns an event object that identifies this command -and can be used to query or wait for this command to complete. -If _event_ is `NULL` or the enqueue is unsuccessful, no event will be -created and therefore it will not be possible to query the status of this -command or to wait for this command to complete. -If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer -to an element of the _event_wait_list_ array. - -*clEnqueueReleaseGLObjects* returns CL_SUCCESS if the function is executed -successfully. -If _num_objects_ is 0 and _mem_objects_ is `NULL` the function does nothing -and returns CL_SUCCESS. -Otherwise, it returns one of the following errors: - - * CL_INVALID_VALUE if _num_objects_ is zero and _mem_objects_ is not a - `NULL` value or if _num_objects_ > 0 and _mem_objects_ is `NULL`. - * CL_INVALID_MEM_OBJECT if memory objects in _mem_objects_ are not valid - OpenCL memory objects. - * CL_INVALID_COMMAND_QUEUE if _command_queue_ is not a valid - command-queue. - * CL_INVALID_CONTEXT if context associated with _command_queue_ was not - created from an OpenGL context - * CL_INVALID_GL_OBJECT if memory objects in _mem_objects_ have not been - created from a GL object(s). - * CL_INVALID_EVENT_WAIT_LIST if _event_wait_list_ is `NULL` and - _num_events_in_wait_list_ > 0, or _event_wait_list_ is not `NULL` and - _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ - are not valid events. - * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_gl_sharing__memobjs-synchronizing-opencl-and-opengl-access-to-shared-objects]] -==== Synchronizing OpenCL and OpenGL Access to Shared Objects - -In order to ensure data integrity, the application is responsible for -synchronizing access to shared CL/GL objects by their respective APIs. -Failure to provide such synchronization may result in race conditions and -other undefined behavior including non-portability between implementations. - -Prior to calling *clEnqueueAcquireGLObjects*, the application must ensure -that any pending GL operations which access the objects specified in -_mem_objects_ have completed. -This may be accomplished portably by issuing and waiting for completion of a -*glFinish* command on all GL contexts with pending references to these -objects. -Implementations may offer more efficient synchronization methods; for -example on some platforms calling *glFlush* may be sufficient, or -synchronization may be implicit within a thread, or there may be -vendor-specific extensions that enable placing a fence in the GL command -stream and waiting for completion of that fence in the CL command-queue. -Note that no synchronization methods other than *glFinish* are portable -between OpenGL implementations at this time. - -Similarly, after calling *clEnqueueReleaseGLObjects*, the application is -responsible for ensuring that any pending OpenCL operations which access the -objects specified in _mem_objects_ have completed prior to executing -subsequent GL commands which reference these objects. -This may be accomplished portably by calling *clWaitForEvents* with the -event object returned by *clEnqueueReleaseGLObjects,* or by calling -*clFinish*. -As above, some implementations may offer more efficient methods. - -The application is responsible for maintaining the proper order of -operations if the CL and GL contexts are in separate threads. - -If a GL context is bound to a thread other than the one in which -*clEnqueueReleaseGLObjects* is called, changes to any of the objects in -_mem_objects_ may not be visible to that context without additional steps -being taken by the application. -For an OpenGL 3.1 (or later) context, the requirements are described in -Appendix D ("`Shared Objects and Multiple Contexts`") of the OpenGL 3.1 -Specification. -For prior versions of OpenGL, the requirements are implementation-dependent. - -Attempting to access the data store of an OpenGL object after it has been -acquired by OpenCL and before it has been released will result in undefined -behavior. -Similarly, attempting to access a shared CL/GL object from OpenCL before it -has been acquired by the OpenCL command-queue, or after it has been -released, will result in undefined behavior. - -[[cl_khr_gl_sharing__memobjs-event-command-types]] -==== Event Command Types for Sharing memory objects that map to GL objects - -The following table describes the event command types for the OpenCL commands -to acquire and release OpenCL memory objects that have been created from -OpenGL objects: - -.List of supported event command types -[width="100%",cols="2,3",options="header"] -|==== -| *Events Created By* -| *Event Command Type* - -| {clEnqueueAcquireGLObjects} -| {CL_COMMAND_ACQUIRE_GL_OBJECTS_anchor} - -include::{generated}/api/version-notes/CL_COMMAND_ACQUIRE_GL_OBJECTS.asciidoc[] - -| {clEnqueueReleaseGLObjects} -| {CL_COMMAND_RELEASE_GL_OBJECTS_anchor} - -include::{generated}/api/version-notes/CL_COMMAND_RELEASE_GL_OBJECTS.asciidoc[] - -|==== diff --git a/ext/cl_khr_il_program.asciidoc b/ext/cl_khr_il_program.asciidoc deleted file mode 100644 index 05201a14..00000000 --- a/ext/cl_khr_il_program.asciidoc +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_il_program]] -== Intermediate Language Programs - -This section describes the *cl_khr_il_program* extension. - -This extension adds the ability to create programs with intermediate language (IL), -usually SPIR-V. Further information about the format and contents of SPIR-V may be -found in the SPIR-V Specification. Information about how SPIR-V modules behave in -the OpenCL environment may be found in the OpenCL SPIR-V Environment Specification. - -This functionality described by this extension is a core feature in OpenCL 2.1. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_il_program-new-procedures-and-functions]] -=== New Procedures and Functions - -[source,opencl] ----- -cl_program clCreateProgramWithILKHR(cl_context context, - const void *il, - size_t length, - cl_int *errcode_ret); ----- - -[[cl_khr_il_program-new-tokens]] -=== New Tokens - -Accepted as a new _param_name_ argument to {clGetDeviceInfo}: - ----- -CL_DEVICE_IL_VERSION_KHR ----- - -Accepted as a new _param_name_ argument to {clGetProgramInfo}: - ----- -CL_PROGRAM_IL_KHR ----- - -[[cl_khr_il_program-additions-to-chapter-3]] -=== Additions to Chapter 3 of the OpenCL 2.0 Specification - -In section 3.1, replace the fourth paragraph with: - -"Programmers provide programs in the form of intermediate language binaries (usually SPIR-V), OpenCL C source strings, or implementation-defined binary objects. The OpenCL platform provides a compiler to translate programs represented as intermediate language binaries or OpenCL C source strings into device program executables. The compiler may be _online_ or _offline_. An _online compiler_ is available during host program execution using standard APIs. An _offline compiler_ is invoked outside of host program control, using platform-specific methods. The OpenCL runtime allows developers to get a previously compiled device program executable and to load and execute a previously compiled device program executable." - -[[cl_khr_il_program-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.0 Specification - -Add to Table 4.3 - OpenCL Device Queries: - -[caption="Table 4.3 "] -.List of supported param_names by {clGetDeviceInfo} -[width="100%",cols="2,1,3",options="header"] -|==== -| Device Info | Return Type | Description - -|{CL_DEVICE_IL_VERSION_KHR} -|char[] -|The intermediate languages that are be supported by {clCreateProgramWithILKHR} for this device. + -{blank} -Returns a space separated list of IL version strings of the form: + -{blank} -+_.+ + -{blank} -A device that supports the *cl_khr_il_program* extension must support the “SPIR-V” IL prefix. - -|==== - -[[cl_khr_il_program-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.0 Specification - -Add to Section 5.8.1: Creating Program Objects: - -"The function - -include::{generated}/api/protos/clCreateProgramWithILKHR.txt[] - -creates a new program object for _context_ using the _length_ bytes of intermediate language pointed to by _il_. - -_context_ must be a valid OpenCL context. - -_il_ is a pointer to a _length_-byte block of memory containing intermediate langage. - -_length_ is the length of the block of memory pointed to by _il_. - -_errcode_ret_ will return an appropriate error code. If _errcode_ret_ is NULL, no error code is returned. - -{clCreateProgramWithILKHR} returns a valid non-zero program object and _errcode_ret_ is set to {CL_SUCCESS} if the program object is created successfully. Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - -* {CL_INVALID_CONTEXT} if _context_ is not a valid context -* {CL_INVALID_VALUE} if _il_ is NULL or if _length_ is zero. -* {CL_INVALID_VALUE} if the _length_-byte block of memory pointed to by _il_ does not contain well-formed intermediate language. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host." - -Add to Section 5.8.2: Building Program Executables: - -Add the following to the description of the _options_ parameter to {clBuildProgram}: - -"Certain options are ignored when _program_ is created with IL." - -Additionally, replace the error: - -* {CL_INVALID_OPERATION} if _program_ was not created with {clCreateProgramWithSource} or {clCreateProgramWithBinary}. - -with: - -* {CL_INVALID_OPERATION} if _program_ was not created with {clCreateProgramWithSource}, {clCreateProgramWithILKHR} or {clCreateProgramWithBinary}. - -Add to Section 5.8.3: Separate Compilation and Linking of Programs: - -Add the following to the description of the _options_ parameter to {clCompileProgram}: - -"Certain options are ignored when _program_ is created with IL." - -Additionally, replace the error: - -* {CL_INVALID_OPERATION} if _program_ has no source i.e. it has not been created with {clCreateProgramWithSource}. - -with: - -* {CL_INVALID_OPERATION} if _program_ was not created with {clCreateProgramWithSource} or {clCreateProgramWithILKHR}. - -Add to Section 5.8.4.1: Preprocessor Options, + -Add to Section 5.8.4.2: Math Intrinsic Options (for -cl-single-precision-constant-only), + -Add to Section 5.8.4.3: Optimization Options, + -Add to Section 5.8.4.4: Options to Request or Suppress Warnings, and + -Add to Section 5.8.4.5: Options Controlling the OpenCL C Version: - -"These options are ignored for programs created with IL." - -Change one entry and add one new entry to Table 5.17 {clGetProgramInfo} parameter queries: - -[caption="Table 5.17 "] -.List of supported param_names by {clGetProgramInfo} -[width="100%",cols="2,1,3",options="header"] -|==== -| Program Info | Return Type | Description - -|{CL_PROGRAM_SOURCE} -|{char_TYPE}[] -|Return the program source code specified by {clCreateProgramWithSource}. The source string returned is a concatenation of all source strings -specified to {clCreateProgramWithSource} with a null terminator. The concatenation strips any nulls in the original source strings. + -{blank} -If program is created using {clCreateProgramWithBinary}, {clCreateProgramWithBuiltInKernels}, or {clCreateProgramWithILKHR} a null string or the appropriate program source code is returned depending on whether or not the program source code is stored in the binary. + -{blank} -The actual number of characters that represents the program source code including the null terminator is returned in _param_value_size_ret_. - -|{CL_PROGRAM_IL_KHR} -|{unsigned_char_TYPE}[] -|Returns the program IL for programs created with {clCreateProgramWithILKHR}. + -{blank} -If program is created with {clCreateProgramWithSource}, {clCreateProgramWithBinary}, or {clCreateProgramWithBuiltInKernels}, the memory pointed to by _param_value_ will be unchanged and _param_value_size_ret_ will be set to zero. - -|==== diff --git a/ext/cl_khr_image2d_from_buffer.asciidoc b/ext/cl_khr_image2d_from_buffer.asciidoc deleted file mode 100644 index ce4e7f21..00000000 --- a/ext/cl_khr_image2d_from_buffer.asciidoc +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_image2d_from_buffer]] -== Creating a 2D Image From A Buffer - -This section describes the *cl_khr_image2d_from_buffer* extension. - -This extension allows a 2D image to be created from an existing OpenCL buffer memory object. - -This extension became a core feature in OpenCL 2.0. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -=== Additions to Chapter 4 of the OpenCL 1.2 Specification - -The following table entry describes the additions to _table 4.3,_ which allows applications to query the configuration information using {clGetDeviceInfo} for an OpenCL device that supports creating a 2D image from a buffer. - -[cols="2,1,2",options="header",] -|======================================================================= -|Device Info -|Return Type -|Description - -|{CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR} -|{cl_uint_TYPE} -|The row pitch alignment size in pixels for images created from a buffer. The value returned must be a power of 2. + -{blank} -If the device does not support images, this value should be 0. - -|{CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR} -|{cl_uint_TYPE} -|This query should be used when an image is created from a buffer which was created using {CL_MEM_USE_HOST_PTR}. The value returned must be a power of 2. + -{blank} -This query specifies the minimum alignment in pixels of the _host_ptr_ specified to {clCreateBuffer}. + -{blank} -If the device does not support images, this value should be 0. - -|======================================================================= - -=== Additions to Chapter 5 of the OpenCL 1.2 Specification - -Add to Section 5.3.1: Creating Image Objects: - -A 2D image can be created from a buffer by specifying a _buffer_ object in the _image_desc_ passed to {clCreateImage} for an _image_type_ equal to {CL_MEM_OBJECT_IMAGE2D}. When the 2D image from buffer is created, the client must specify the width, height and image format (i.e. channel order and channel data type). If these are not specified, {clCreateImage} returns a NULL value with _errcode_ret_ set to {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR}. The pitch can be optionally specified. If the pitch is not specified, the pitch is computed as width {times} bytes per pixel based on the image format. - -The pitch specified (or computed if pitch specified is 0) must be a multiple of the maximum of the {CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR} value for all devices in the context associated with the _buffer_ that support images. Otherwise, {clCreateImage} returns a NULL value with _errcode_ret_ set to {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR}. - -If the _buffer_ was created with {CL_MEM_USE_HOST_PTR}, the _host_ptr_ specified to {clCreateBuffer} must be aligned to the maximum of the {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR} value for all devices in the context associated with the _buffer_ that support images. Otherwise, {clCreateImage} returns a NULL value with _errcode_ret_ set to {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR}. - -The minimum list of supported image formats described in _table 5.8_ of the OpenCL 1.2 specification must be supported for 2D images created from a buffer. - -The OpenCL runtime APIs that operate on images (i.e. {clEnqueueReadImage}, {clEnqueueWriteImage}, {clEnqueueFillImage}, {clEnqueueCopyImage}, {clEnqueueCopyImageToBuffer}, {clEnqueueCopyBufferToImage} and {clEnqueueMapImage}) are supported for a 2D image created from a buffer. - -When the contents of a buffer object data store are modified, those changes are reflected in the contents of the 2D image object and vice-versa at corresponding synchronization points. The _image_height_ {times} _image_row_pitch_ specified in _image_desc_ must be less than or equal to the size of the buffer object data store. - -NOTE: Concurrent reading from, writing to, and copying between both a buffer object and the 2D image object associated with the buffer object is undefined. Only reading from both a buffer object and 2D image object associated with the buffer object is defined. A 2D image and a 2D image created from a buffer use the same image type in OpenCL C (`image2d_t`). The image built-ins functions described in _section 6.12.14.2_, _6.12.14.3_, _6.12.14.4_ and _6.12.14.5_ for `image2d_t` behave the same way for a 2D image and a 2D image from a buffer. diff --git a/ext/cl_khr_initialize_memory.asciidoc b/ext/cl_khr_initialize_memory.asciidoc deleted file mode 100644 index 29a078bf..00000000 --- a/ext/cl_khr_initialize_memory.asciidoc +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_initialize_memory]] -== Local and Private Memory Initialization - -Memory is allocated in various forms in OpenCL both explicitly (global -memory) or implicitly (local, private memory). -This allocation so far does not provide a straightforward mechanism to -initialize the memory on allocation. -In other words what is lacking is the equivalent of calloc for the currently -supported malloc like capability. -This functionality is useful for a variety of reasons including ease of -debugging, application controlled limiting of visibility to previous -contents of memory and in some cases, optimization. - -This extension adds support for initializing local and private memory before -a kernel begins execution. -This extension name is *cl_khr_initialize_memory*. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_initialize_memory-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -Add a new context property to _table 4.5_ in _section 4.4_. - -// Note: Some of these extension enums and types are currently missing, -// see https://github.com/KhronosGroup/OpenCL-Docs/issues/872 - -.List of supported context creation properties by {clCreateContext} -[cols="3,2,4",options="header",] -|==== -| Context Property -| Property value -| Description - -| {CL_CONTEXT_MEMORY_INITIALIZE_KHR} -| {cl_context_memory_initialize_khr_TYPE} -| Describes which memory types for the context must be initialized. - This is a bit-field, where the following values are currently supported: - - {CL_CONTEXT_MEMORY_INITIALIZE_LOCAL_KHR} -- Initialize local memory to - zeros. - - {CL_CONTEXT_MEMORY_INITIALIZE_PRIVATE_KHR} -- Initialize private memory to - zeros. - -|==== - -[[cl_khr_initialize_memory-additions-to-chapter-6]] -=== Additions to Chapter 6 of the OpenCL 2.2 Specification - -Updates to _section 6.9_ -- Restrictions - -If the context is created with {CL_CONTEXT_MEMORY_INITIALIZE_KHR}, appropriate -memory locations as specified by the bit-field is initialized with zeroes, -prior to the start of execution of any kernel. -The driver chooses when, prior to kernel execution, the initialization of -local and/or private memory is performed. -The only requirement is there should be no values set from outside the -context, which can be read during a kernel execution. diff --git a/ext/cl_khr_int32_atomics.asciidoc b/ext/cl_khr_int32_atomics.asciidoc deleted file mode 100644 index f6b79ae8..00000000 --- a/ext/cl_khr_int32_atomics.asciidoc +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_int32_atomics]] -== 32-bit Atomics - -This section describes the extensions *cl_khr_global_int32_base_atomics*, *cl_khr_global_int32_extended_atomics*, *cl_khr_local_int32_base_atomics*, and *cl_khr_local_int32_extended_atomics*. -These extensions allow atomic operations to be performed on 32-bit signed and unsigned integers in global and local memory. - -These extensions became core features in OpenCL 1.1, except the built-in atomic function names are changed to use the **atomic_** prefix instead of **atom_** and the volatile qualifier was added to the pointer parameter _p_. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -=== Global Atomics for 32-bit Integers - -==== Base Atomics - -._Built-in Atomic Functions for_ *cl_khr_global_int32_base_atomics* -[cols="9,5",options="header",] -|======================================================================= -|*Function* |*Description* - -| -int **atom_add** (volatile {global} int *_p_, int _val_) + -uint **atom_add** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_sub** (volatile {global} int *_p_, int _val_) + -uint **atom_sub** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_xchg** (volatile {global} int *_p_, int _val_) + -uint **atom_xchg** (volatile {global} uint *_p_, uint _val_) - -|Swaps the _old_ value stored at location _p_ with new value given by -_val_. Returns _old_ value. - -| -int **atom_inc** (volatile {global} int *_p_) + -uint **atom_inc** (volatile {global} uint *_p_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_dec** (volatile {global} int *_p_) + -uint **atom_dec** (volatile {global} uint *_p_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_cmpxchg** (volatile {global} int *_p_, int _cmp_, int _val_) + -uint **atom_cmpxchg** (volatile {global} uint *_p_, uint _cmp_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ == _cmp_) ? _val_ : _old_ and store -result at location pointed by _p_. The function returns _old_. - -|======================================================================= - -==== Extended Atomics - -._Built-in Atomic Functions for_ *cl_khr_global_int32_extended_atomics* -[cols="9,5",options="header",] -|======================================================================= -|*Function* |*Description* - -| -int **atom_min** (volatile {global} int *_p_, int _val_) + -uint **atom_min** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *min*(_old_, _val_) and store minimum value at -location pointed by _p_. The function returns _old_. - -| -int **atom_max** (volatile {global} int *_p_, int _val_) + -uint **atom_max** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *max*(_old_, _val_) and store maximum value at -location pointed by _p_. The function returns _old_. - -| -int **atom_and** (volatile {global} int *_p_, int _val_) + -uint **atom_and** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ & val) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_or** (volatile {global} int *_p_, int _val_) + -uint **atom_or** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ \| val) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_xor** (volatile {global} int *_p_, int _val_) + -uint **atom_xor** (volatile {global} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ ^ val) and store result at location -pointed by _p_. The function returns _old_. - -|======================================================================= - -=== Local Atomics for 32-bit Integers - -==== Base Atomics - -._Built-in Atomic Functions for_ *cl_khr_local_int32_base_atomics* -[cols="9,5",options="header",] -|======================================================================= -|*Function* |*Description* - -| -int **atom_add** (volatile {local} int *_p_, int _val_) + -uint **atom_add** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_sub** (volatile {local} int *_p_, int _val_) + -uint **atom_sub** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_xchg** (volatile {local} int *_p_, int _val_) + -uint **atom_xchg** (volatile {local} uint *_p_, uint _val_) - -|Swaps the _old_ value stored at location _p_ with new value given by -_val_. Returns _old_ value. - -| -int **atom_inc** (volatile {local} int *_p_) + -uint **atom_inc** (volatile {local} uint *_p_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_dec** (volatile {local} int *_p_) + -uint **atom_dec** (volatile {local} uint *_p_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_cmpxchg** (volatile {local} int *_p_, int _cmp_, int _val_) + -uint **atom_cmpxchg** (volatile {local} uint *_p_, uint _cmp_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ == _cmp_) ? _val_ : _old_ and store -result at location pointed by _p_. The function returns _old_. - -|======================================================================= - -==== Extended Atomics - -._Built-in Atomic Functions for_ *cl_khr_local_int32_extended_atomics* -[cols="9,5",options="header",] -|======================================================================= -|*Function* |*Description* - -| -int **atom_min** (volatile {local} int *_p_, int _val_) + -uint **atom_min** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *min*(_old_, _val_) and store minimum value at -location pointed by _p_. The function returns _old_. - -| -int **atom_max** (volatile {local} int *_p_, int _val_) + -uint **atom_max** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *max*(_old_, _val_) and store maximum value at -location pointed by _p_. The function returns _old_. - -| -int **atom_and** (volatile {local} int *_p_, int _val_) + -uint **atom_and** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ & val) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_or** (volatile {local} int *_p_, int _val_) + -uint **atom_or** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ \| val) and store result at location -pointed by _p_. The function returns _old_. - -| -int **atom_xor** (volatile {local} int *_p_, int _val_) + -uint **atom_xor** (volatile {local} uint *_p_, uint _val_) - -|Read the 32-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ ^ val) and store result at location -pointed by _p_. The function returns _old_. - -|======================================================================= - diff --git a/ext/cl_khr_int64_atomics.asciidoc b/ext/cl_khr_int64_atomics.asciidoc deleted file mode 100644 index ebed8522..00000000 --- a/ext/cl_khr_int64_atomics.asciidoc +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_int64_atomics]] -== 64-bit Atomics - -This section describes the *cl_khr_int64_base_atomics* and *cl_khr_int64_extended_atomics* extensions. These extensions allow atomic operations to be performed on 64-bit signed and unsigned integers in global and local memory. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -._Built-in Atomic Functions for_ *cl_khr_int64_base_atomics* -[cols="9,5",options="header",] -|======================================================================= -|*Function* |*Description* - -| -long **atom_add** (volatile {global} long *_p_, long _val_) + -long **atom_add** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_add** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_add** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_sub** (volatile {global} long *_p_, long _val_) + -long **atom_sub** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_sub** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_sub** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _val_) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_xchg** (volatile {global} long *_p_, long _val_) + -long **atom_xchg** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_xchg** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_xchg** (volatile {local} ulong *_p_, ulong _val_) - -|Swaps the _old_ value stored at location _p_ with new value given by -_val_. Returns _old_ value. - -| -long **atom_inc** (volatile {global} long *_p_) + -long **atom_inc** (volatile {local} long *_p_) + -{blank} -ulong **atom_inc** (volatile {global} ulong *_p_) + -ulong **atom_inc** (volatile {local} ulong *_p_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ + _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_dec** (volatile {global} long *_p_) + -long **atom_dec** (volatile {local} long *_p_) + -{blank} -ulong **atom_dec** (volatile {global} ulong *_p_) + -ulong **atom_dec** (volatile {local} ulong *_p_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ - _1_) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_cmpxchg** (volatile {global} long *_p_, long _cmp_, long _val_) + -long **atom_cmpxchg** (volatile {local} long *_p_, long _cmp_, long _val_) + -{blank} -ulong **atom_cmpxchg** (volatile {global} ulong *_p_, ulong _cmp_, ulong _val_) + -ulong **atom_cmpxchg** (volatile {local} ulong *_p_, ulong _cmp_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ == _cmp_) ? _val_ : _old_ and store -result at location pointed by _p_. The function returns _old_. - -|======================================================================= - -._Built-in Atomic Functions for_ *cl_khr_int64_extended_atomics* -[cols=",",options="header",] -|======================================================================= -|*Function* |*Description* - -| -long **atom_min** (volatile {global} long *_p_, long _val_) + -long **atom_min** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_min** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_min** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *min*(_old_, _val_) and store minimum value at -location pointed by _p_. The function returns _old_. - -| -long **atom_max** (volatile {global} long *_p_, long _val_) + -long **atom_max** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_max** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_max** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute *max*(_old_, _val_) and store maximum value at -location pointed by _p_. The function returns _old_. - -| -long **atom_and** (volatile {global} long *_p_, long _val_) + -long **atom_and** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_and** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_and** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ & val) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_or** (volatile {global} long *_p_, long _val_) + -long **atom_or** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_or** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_or** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ \| val) and store result at location -pointed by _p_. The function returns _old_. - -| -long **atom_xor** (volatile {global} long *_p_, long _val_) + -long **atom_xor** (volatile {local} long *_p_, long _val_) + -{blank} -ulong **atom_xor** (volatile {global} ulong *_p_, ulong _val_) + -ulong **atom_xor** (volatile {local} ulong *_p_, ulong _val_) - -|Read the 64-bit value (referred to as _old_) stored at location -pointed by _p_. Compute (_old_ ^ val) and store result at location -pointed by _p_. The function returns _old_. - -|======================================================================= - -Note: Atomic operations on 64-bit integers and 32-bit integers (and -float) are also atomic w.r.t. each other. diff --git a/ext/cl_khr_integer_dot_product.asciidoc b/ext/cl_khr_integer_dot_product.asciidoc deleted file mode 100644 index 9ed542cd..00000000 --- a/ext/cl_khr_integer_dot_product.asciidoc +++ /dev/null @@ -1,256 +0,0 @@ -// Copyright 2020-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_integer_dot_product]] -== Integer dot product - -This extension adds support for SPIR-V instructions and OpenCL C built-in -functions to compute the dot product of vectors of integers. - -=== General Information - -==== Name Strings - -`cl_khr_integer_dot_product` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-06-17 | 1.0.0 | Initial version. -| 2021-06-23 | 2.0.0 | All 8-bit support is mandatory, added 8-bit acceleration properties. -|==== - -==== Dependencies - -This extension is written against the OpenCL Specification Version 3.0.6, -and OpenCL C Specification Version 3.0.6 and OpenCL Environment Specification -Version 3.0.6. - -This extension requires OpenCL 1.0. - -==== Contributors - -Kévin Petit, Arm Ltd. + -Jeremy Kemp, Imagination Technologies + -Ben Ashbaugh, Intel + -Ruihao Zhang, Qualcomm + -Stuart Brady, Arm Ltd + -Balaji Calidas, Qualcomm + -Ayal Zaks, Intel + - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo}: - -[source,opencl] ----- - -CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR (1 << 0) -CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR (1 << 1) - -CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR 0x1073 - -CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR 0x1074 -CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR 0x1075 ----- - -=== New OpenCL C Functions - -This extension defines a number of new functions that operate on vectors -of integers. The exact function overloads available depend on the features -supported by the target device. - -[source,opencl_c] ----- -uint dot(uchar4 a, uchar4 b); -int dot(char4 a, char4 b); -int dot(uchar4 a, char4 b); -int dot(char4 a, uchar4 b); - -uint dot_acc_sat(uchar4 a, uchar4 b, uint acc); -int dot_acc_sat(char4 a, char4 b, int acc); -int dot_acc_sat(uchar4 a, char4 b, int acc); -int dot_acc_sat(char4 a, uchar4 b, int acc); - -uint dot_4x8packed_uu_uint(uint a, uint b); -int dot_4x8packed_ss_int(uint a, uint b); -int dot_4x8packed_us_int(uint a, uint b); -int dot_4x8packed_su_int(uint a, uint b); - -uint dot_acc_sat_4x8packed_uu_uint(uint a, uint b, uint acc); -int dot_acc_sat_4x8packed_ss_int(uint a, uint b, int acc); -int dot_acc_sat_4x8packed_us_int(uint a, uint b, int acc); -int dot_acc_sat_4x8packed_su_int(uint a, uint b, int acc); ----- - -=== Modifications to the OpenCL API Specification - -(Modify Section 4.2, *Querying Devices*) :: -+ --- - -(Add the following to Table 4.3, _Device Queries_) :: -+ -[cols="2,2,4",options="header"] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR} -| {cl_device_integer_dot_product_capabilities_khr_type} -| Returns the integer dot product capabilities supported by the device. + - + -{CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} is always set - indicating that all implementations that support `cl_khr_integer_dot_product` - must support dot product built-in functions and, when SPIR-V is supported, - SPIR-V instructions that take four-component vectors of 8-bit integers packed - into 32-bit integers as input. + -{CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} is set when dot product - built-in functions and, when SPIR-V is supported, SPIR-V instructions that - take four-component of 8-bit elements as input are supported. + - NOTE: {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} must be set in version - 2.x of the extension. - - -| {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR} -| {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} -| Returns a structure describing the exact 8-bit dot product combinations - that are <> on the device. + - Each member is {CL_TRUE} if the combination it corresponds to is accelerated, - {CL_FALSE} otherwise. + - NOTE: {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR} is missing - before version 2.0 of the extension. - -| {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR} -| {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} -| Returns a structure describing the exact 4x8-bit packed dot product combinations - that are <> on the device. + - Each member is {CL_TRUE} if the combination it corresponds to is accelerated, - {CL_FALSE} otherwise. + - NOTE: {CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR} is missing - before version 2.0 of the extension. -|==== - -OpenCL 3 devices must report the following feature macros via -{CL_DEVICE_OPENCL_C_FEATURES} when the corresponding bit is set in the bitfield -returned for {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR}: - -[cols="1,1",options="header"] -|==== -| Feature bit -| Feature macro - -| {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} -| `__opencl_c_integer_dot_product_input_4x8bit_packed` - -| {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} -| `__opencl_c_integer_dot_product_input_4x8bit` - -|==== - -[[integer-dot-product-acceleration-properties]] -The {cl_device_integer_dot_product_acceleration_properties_khr_TYPE} structure -describes the exact dot product operations that are accelerated on the device: - -include::{generated}/api/structs/cl_device_integer_dot_product_acceleration_properties_khr.txt[] - - * _signed_accelerated_ is {CL_TRUE} when signed dot product operations are - accelerated, {CL_FALSE} otherwise. - * _unsigned_accelerated_ is {CL_TRUE} when unsigned dot product operations - are accelerated, {CL_FALSE} otherwise. - * _mixed_signedness_accelerated_ is {CL_TRUE} when mixed signedness dot - product operations are accelerated, {CL_FALSE} otherwise. - * _accumulating_saturating_signed_accelerated_ is {CL_TRUE} when accumulating - saturating signed dot product operations are accelerated, {CL_FALSE} - otherwise. - * _accumulating_saturating_unsigned_accelerated_ is {CL_TRUE} when accumulating - saturating unsigned dot product operations are accelerated, {CL_FALSE} - otherwise. - * _accumulating_saturating_mixed_signedness_accelerated_ is {CL_TRUE} when - accumulating saturating mixed signedness dot product operations are - accelerated, {CL_FALSE} otherwise. - -A dot product operation is deemed accelerated if its implementation provides -a performance advantage over application-provided code composed from elementary -instructions and/or other dot product instructions, either because the -implementation uses optimized machine code sequences whose generation from -application-provided code cannot be guaranteed or because it uses hardware -features that cannot otherwise be targeted from application-provided code. --- - -=== Modifications to the OpenCL C Specification - -(Modify section 6.13.3, *Integer Functions*) :: -+ --- - -The following built-in functions and preprocessor definitions are added: - -[source,opencl_c] ----- -#define cl_khr_integer_dot_product 1 - -if (CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR) { - #define __opencl_c_integer_dot_product_input_4x8bit_packed 1 - - uint dot_4x8packed_uu_uint(uint a, uint b); - int dot_4x8packed_ss_int(uint a, uint b); - int dot_4x8packed_us_int(uint a, uint b); - int dot_4x8packed_su_int(uint a, uint b); - - uint dot_acc_sat_4x8packed_uu_uint(uint a, uint b, uint acc); - int dot_acc_sat_4x8packed_ss_int(uint a, uint b, int acc); - int dot_acc_sat_4x8packed_us_int(uint a, uint b, int acc); - int dot_acc_sat_4x8packed_su_int(uint a, uint b, int acc); -} - -if (CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR) { - #define __opencl_c_integer_dot_product_input_4x8bit 1 - - uint dot(uchar4 a, uchar4 b); - int dot(char4 a, char4 b); - int dot(uchar4 a, char4 b); - int dot(char4 a, uchar4 b); - - uint dot_acc_sat(uchar4 a, uchar4 b, uint acc); - int dot_acc_sat(char4 a, char4 b, int acc); - int dot_acc_sat(uchar4 a, char4 b, int acc); - int dot_acc_sat(char4 a, uchar4 b, int acc); -} ----- - - -* `dot` returns the dot product of the two input vectors `a` and `b`. The -components of `a` and `b` are sign- or zero-extended to the width of the -destination type and the vectors with extended components are multiplied -component-wise. All the components of the resulting vectors are added -together to form the final result. - -* `dot_acc_sat` returns the saturating addition of the dot product of the two -input vectors `a` and `b` and the accumulator `acc`: - ----- -product = dot(a,b); -result = add_sat(product, acc); ----- - -* `dot_*_4x8packed_XY_R` returns the dot product of the two vectors packed -into `a` and `b` (lowest component in least significant byte). The components -are unpacked, sign- or zero-extended to the width of the destination type before -the multiplications and additions. `X` represents the signedness of the components -of `a`, `Y` that of the components of `b`. `R` is the return type. --- - -=== Modifications to the OpenCL SPIR-V Environment Specification - -See OpenCL SPIR-V Environment Specification. - -=== Interactions with Other Extensions - -If `cl_khr_il_program` is supported then the SPIR-V environment specification -modifications described above apply. - diff --git a/ext/cl_khr_mipmap_image.asciidoc b/ext/cl_khr_mipmap_image.asciidoc deleted file mode 100644 index c7a43592..00000000 --- a/ext/cl_khr_mipmap_image.asciidoc +++ /dev/null @@ -1,609 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_mipmap_image]] -== Mipmaps - -This section describes OpenCL support for mipmaps. - -There are two optional mipmap extensions. -The *cl_khr_mipmap_image* extension adds the ability to create a mip-mapped -image, enqueue commands to read/write/copy/map/unmap a region of a mipmapped -image, and built-in functions that can be used to read a mip-mapped image in -an OpenCL C program. -The *cl_khr_mipmap_image_writes* extension adds built-in functions that can -be used to write a mip-mapped image in an OpenCL C program. -If the *cl_khr_mipmap_image_writes* extension is supported by the OpenCL -device, the *cl_khr_mipmap_image* extension must also be supported. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_mipmap_image-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -[[cl_khr_mipmap_image-additions-to-section-5.3]] -==== Additions to section 5.3 -- Image Objects - -A mip-mapped 1D image, 1D image array, 2D image, 2D image array or 3D image -is created by specifying _num_mip_levels_ to be a value greater than one in -the _image_desc_ passed to {clCreateImage}. -The dimensions of a mip-mapped image can be a power of two or a non-power of -two. -Each successively smaller mipmap level is half the size of the previous -level. -If this half value is a fractional value, it is rounded down to the nearest -integer. - -*Restrictions* - -The following restrictions apply when mip-mapped images are created with -{clCreateImage}: - - * {CL_MEM_USE_HOST_PTR} or {CL_MEM_COPY_HOST_PTR} cannot be specified if a - mip-mapped image is created. - * The _host_ptr_ argument to {clCreateImage} must be a `NULL` value. - * Mip-mapped images cannot be created for {CL_MEM_OBJECT_IMAGE1D_BUFFER} - images, depth images or multi-sampled (i.e. msaa) images. - -Calls to {clEnqueueReadImage}, {clEnqueueWriteImage} and {clEnqueueMapImage} -can be used to read from or write to a specific mip-level of a mip-mapped -image. -If image argument is a 1D image, _origin_[1] specifies the mip-level to use. -If image argument is a 1D image array, _origin_[2] specifies the mip-level -to use. -If image argument is a 2D image, _origin_[2] specifies the mip-level to use. -If image argument is a 2D image array or a 3D image, _origin_[3] specifies -the mip-level to use. - -Calls to {clEnqueueCopyImage}, {clEnqueueCopyImageToBuffer} and -{clEnqueueCopyBufferToImage} can also be used to copy from and to a specific -mip-level of a mip-mapped image. -If _src_image_ argument is a 1D image, _src_origin_[1] specifies the -mip-level to use. -If _src_image_ argument is a 1D image array, _src_origin_[2] specifies the -mip-level to use. -If _src_image_ argument is a 2D image, _src_origin_[2] specifies the -mip-level to use. -If _src_image_ argument is a 2D image array or a 3D image, _src_origin_[3] -specifies the mip-level to use. -If _dst_image_ argument is a 1D image, _dst_origin_[1] specifies the -mip-level to use. -If _dst_image_ argument is a 1D image array, _dst_origin_[2] specifies the -mip-level to use. -If _dst_image_ argument is a 2D image, _dst_origin_[2] specifies the -mip-level to use. -If _dst_image_ argument is a 2D image array or a 3D image, _dst_origin_[3] -specifies the mip-level to use. - -If the mip level specified is not a valid value, these functions return the -error {CL_INVALID_MIP_LEVEL}. - -Calls to {clEnqueueFillImage} can be used to write to a specific mip-level of -a mip-mapped image. -If image argument is a 1D image, origin[1] specifies the mip-level to use. -If image argument is a 1D image array, origin[2] specifies the mip-level to -use. -If image argument is a 2D image, origin[2] specifies the mip-level to use. -If image argument is a 2D image array or a 3D image, origin[3] specifies the -mip-level to use. - -[[cl_khr_mipmap_image-additions-to-section-5.7]] -==== Additions to section 5.7 -- Sampler Objects - -Add the following sampler properties _to table 5.14_ that can be specified -when a sampler object is created using {clCreateSamplerWithProperties}. - -[cols="3,1,2",options="header",] -|==== -| Sampler Property -| Property Value -| Default Value - -| {CL_SAMPLER_MIP_FILTER_MODE_KHR} -| {cl_filter_mode_TYPE} -| {CL_FILTER_NEAREST} - -| {CL_SAMPLER_LOD_MIN_KHR} -| {cl_float_TYPE} -| `0.0f` - -| {CL_SAMPLER_LOD_MAX_KHR} -| {cl_float_TYPE} -| `MAXFLOAT` - -|==== - -Note: The sampler properties {CL_SAMPLER_MIP_FILTER_MODE_KHR}, -{CL_SAMPLER_LOD_MIN_KHR} and {CL_SAMPLER_LOD_MAX_KHR} cannot be specified with -any samplers initialized in the OpenCL program source. -Only the default values for these properties will be used. -To create a sampler with specific values for these properties, a sampler -object must be created with {clCreateSamplerWithProperties} and passed as an -argument to a kernel. - -[[cl_khr_mipmap_image-additions-to-chapter-6-of-the-opencl-2.0-specification]] -=== Additions to Chapter 6 of the OpenCL 2.0 Specification - -[[cl_khr_mipmap_image-additions-to-section-6.13.14-image-read-write-and-query-functions]] -==== Additions to section 6.13.14 – Image Read, Write and Query Functions - -The image read and write functions described in _sections 6.13.14.2_, -_6.13.14.3_ and _6.13.14.4_ read from and write to mip-level 0 if the -image argument is a mip-mapped image. - -The following new built-in functions are added to _section 6.13.14.2_. - -[cols="5a,4",options="header",] -|======================================================================= -|*Function* |*Description* -|[source,opencl_c] ----- -float4 read_imagef( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float lod) - -int4 read_imagei( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float lod) - -uint4 read_imageui( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float lod) - -float read_imagef( - read_only image2d_depth_t image, - sampler_t sampler, - float2 coord, - float lod) ----- -| Use the coordinate _coord.xy_ to do an element lookup in the mip-level specified by _lod_ in the 2D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) - -int4 read_imagei( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) - -uint4 read_imageui( - read_only image2d_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) - -float read_imagef( - read_only image2d_depth_t image, - sampler_t sampler, - float2 coord, - float2 gradient_x, - float2 gradient_y) ----- -| Use the gradients to compute the lod and coordinate _coord.xy_ to do an element lookup in the mip-level specified by the computed lod in the 2D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image1d_t image, - sampler_t sampler, - float coord, - float lod) - -int4 read_imagei( - read_only image1d_t image, - sampler_t sampler, - float coord, - float lod) - -uint4 read_imageui( - read_only image1d_t image, - sampler_t sampler, - float coord, - float lod) ----- -| Use the coordinate _coord_ to do an element lookup in the mip-level specified by _lod_ in the 1D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image1d_t image, - sampler_t sampler, - float coord, - float gradient_x, - float gradient_y) - -int4 read_imagei( - read_only image1d_t image, - sampler_t sampler, - float coord, - float gradient_x, - float gradient_y) - -uint4 read_imageui( - read_only image1d_t image, - sampler_t sampler, - float coord, - float gradient_x, - float gradient_y) ----- -| Use the gradients to compute the lod and coordinate _coord_ to do an element lookup in the mip-level specified by the computed lod in the 1D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float lod) - -int4 read_imagei( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float lod) - -uint4 read_imageui( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float lod) ----- -| Use the coordinate _coord.xyz_ to do an element lookup in the mip-level specified by _lod_ in the 3D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float4 gradient_x, - float4 gradient_y) - -int4 read_imagei( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float4 gradient_x, - float4 gradient_y) - -uint4 read_imageui( - read_only image3d_t image, - sampler_t sampler, - float4 coord, - float4 gradient_x, - float4 gradient_y) ----- -| Use the gradients to compute the lod and coordinate _coord.xyz_ to do an element lookup in the mip-level specified by the computed lod in the 3D image object specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float lod) - -int4 read_imagei( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float lod) - -uint4 read_imageui( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float lod) ----- -| Use the coordinate _coord.x_ to do an element lookup in the 1D image identified by _coord.x_ and mip-level specified by _lod_ in the 1D image array specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float gradient_x, - float gradient_y) - -int4 read_imagei( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float gradient_x, - float gradient_y) - -uint4 read_imageui( - read_only image1d_array_t image, - sampler_t sampler, - float2 coord, - float gradient_x, - float gradient_y) ----- -| Use the gradients to compute the lod and coordinate _coord.x_ to do an element lookup in the mip-level specified by the computed lod in the 1D image array specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float lod) - -int4 read_imagei( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float lod) - -uint4 read_imageui( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float lod) - -float read_imagef( - read_only image2d_array_depth_t image, - sampler_t sampler, - float4 coord, - float lod) ----- -| Use the coordinate _coord.xy_ to do an element lookup in the 2D image identified by _coord.z_ and mip-level specified by _lod_ in the 2D image array specified by _image_. - -|[source,opencl_c] ----- -float4 read_imagef( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) - -int4 read_imagei( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) - -uint4 read_imageui( - read_only image2d_array_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) - -float read_imagef( - read_only image2d_array_depth_t image, - sampler_t sampler, - float4 coord, - float2 gradient_x, - float2 gradient_y) ----- -| Use the gradients to compute the lod coordinate and _coord.xy_ to do an element lookup in the 2D image identified by _coord.z_ and mip-level specified by the computed lod in the 2D image array specified by _image_. -|======================================================================= - -NOTE: {CL_SAMPLER_NORMALIZED_COORDS} must be {CL_TRUE} for built-in functions described in the table above that read from a mip-mapped image; otherwise the behavior is undefined. -The value specified in the _lod_ argument is clamped to the minimum of (actual number of mip-levels – 1) in the image or value specified for {CL_SAMPLER_LOD_MAX}. - -The following new built-in functions are added to _section 6.13.14.4_. - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* |*Description* -|[source,opencl_c] ----- -void write_imagef( - write_only image2d_t image, - int2 coord, - int lod, - float4 color) - -void write_imagei( - write_only image2d_t image, - int2 coord, - int lod, - int4 color) - -void write_imageui( - write_only image2d_t image, - int2 coord, - int lod, - uint4 color) - -void write_imagef( - write_only image2d_depth_t image, - int2 coord, - int lod, - float depth) ----- -| Write _color_ value to location specified by _coord.xy_ in the mip-level specified by _lod_ in the 2D image object specified by _image_. -Appropriate data format conversion to the specified image format is done before writing the color value. -_coord.x_ and _coord.y_ are considered to be unnormalized coordinates and must be in the range 0 .. image width of mip-level specified by _lod_ – 1, and 0 .. image height of mip-level specified by _lod_ – 1. - -The behavior of *write_imagef*, *write_imagei* and *write_imageui* if (_x_, _y_) coordinate values are not in the range (0 .. image width of the mip-level specified by _lod_ – 1, 0 .. image height of the mip-level specified by _lod_ – 1) or _lod_ value exceeds the (number of mip-levels in the image – 1) is undefined. - -|[source,opencl_c] ----- -void write_imagef( - write_only image1d_t image, - int coord, - int lod, - float4 color) - -void write_imagei( - write_only image1d_t image, - int coord, - int lod, - int4 color) - -void write_imageui( - write_only image1d_t image, - int coord, - int lod, - uint4 color) ----- -|Write _color_ value to location specified by _coord_ in the mip-level specified by _lod_ in the 1D image object specified by _image_. -Appropriate data format conversion to the specified image format is done before writing the color value. -_coord_ is considered to be unnormalized coordinates and must be in the range 0 .. image width of the mip-level -specified by _lod_ – 1. - -The behavior of *write_imagef*, *write_imagei* and *write_imageui* if coordinate value is not in the range (0 .. image width of the mip-level specified by _lod_ – 1) or _lod_ value exceeds the (number of mip-levels in the image – 1), is undefined. - -|[source,opencl_c] ----- -void write_imagef( - write_only image1d_array_t image, - int2 coord, - int lod, - float4 color) - -void write_imagei( - write_only image1d_array_t image, - int2 coord, - int lod, - int4 color) - -void write_imageui( - write_only image1d_array_t image, - int2 coord, - int lod, - uint4 color) ----- -| Write _color_ value to location specified by _coord.x_ in the 1D image identified by _coord.y_ and mip-level _lod_ in the 1D image array specified by _image_. -Appropriate data format conversion to the specified image format is done before writing the color value. -_coord.x_ and _coord.y_ are considered to be unnormalized coordinates and must be in the range 0 .. image width of the mip-level specified by _lod_ – 1 and 0 .. image number of layers – 1. - -The behavior of *write_imagef*, *write_imagei* and *write_imageui* if (_x_, _y_) coordinate values are not in the range (0 .. image width of the mip-level specified by _lod_ – 1, 0 .. image number of layers – 1), respectively or _lod_ value exceeds the (number of mip-levels in the image – 1), is undefined. - -|[source,opencl_c] ----- -void write_imagef( - write_only image2d_array_t image, - int4 coord, - int lod, - float4 color) - -void write_imagei( - write_only image2d_array_t image, - int4 coord, - int lod, - int4 color) - -void write_imageui( - write_only image2d_array_t image, - int4 coord, - int lod, - uint4 color) - -void write_imagef( - write_only image2d_array_depth_t image, - int4 coord, - int lod, - float depth) ----- -| Write _color_ value to location specified by _coord.xy_ in the 2D image identified by _coord.z_ and mip-level _lod_ in the 2D image array specified by _image_. -Appropriate data format conversion to the specified image format is done before writing the color value. -_coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized coordinates and must be in the range 0 .. image width of the mip-level specified by _lod_ – 1, 0 .. image height – 1 specified by _lod_ – 1 and 0 .. image number of layers – 1. - -The behavior of *write_imagef*, *write_imagei* and *write_imageui* if (_x_, _y, z_) coordinate values are not in the range (0 .. image width of the mip-level specified by _lod_ – 1, 0 .. image height of the mip-level specified by _lod_ – 1, 0 .. image number of layers – 1), respectively or _lod_ value exceeds the (number of mip-levels in the image – 1), is undefined. - -|[source,opencl_c] ----- -void write_imagef( - write_only image3d_t image, - int4 coord, - int lod, - float4 color) - -void write_imagei( - write_only image3d_t image, - int4 coord, - int lod, - int4 color) - -void write_imageui( - write_only image3d_t image, - int4 coord, - int lod, - uint4 color) ----- -| Write color value to location specified by _coord.xyz_ and mip-level _lod_ in the 3D image object specified by _image_. -Appropriate data format conversion to the specified image format is done before writing the color value. -_coord.x_, _coord.y_ and _coord.z_ are considered to be unnormalized coordinates and must be in the range 0 .. image width – 1 -specified by _lod_ – 1, 0 .. image height – 1 specified by _lod_ – 1 and 0 .. image depth – 1 specified by _lod_ – 1. - -The behavior of *write_imagef*, *write_imagei* and *write_imageui* if (_x_, _y, z_) coordinate values are not in the range (0 .. image width of the mip-level specified by _lod_ – 1, 0 .. image height of the mip-level specified by _lod_ – 1, 0 .. image depth – 1), respectively or _lod_ value exceeds the (number of mip-levels in the image – 1), is undefined. - -|======================================================================= - -The following new built-in functions are added to _section 6.13.14.5_. - -[cols="1a,1",options="header",] -|================================= -|*Function* |*Description* -|[source,opencl_c] ----- -int get_image_num_mip_levels( - image1d_t image) - -int get_image_num_mip_levels( - image2d_t image) - -int get_image_num_mip_levels( - image3d_t image) - -int get_image_num_mip_levels( - image1d_array_t image) - -int get_image_num_mip_levels( - image2d_array_t image) - -int get_image_num_mip_levels( - image2d_depth_t image) - -int get_image_num_mip_levels( - image2d_array_depth_t image) ----- -| Return the number of mip-levels. -|================================= - -[[cl_khr_mipmap_image-additions-to-creating-opencl-memory-objects-from-opengl-objects]] -=== Additions to <> - -If both the *cl_khr_mipmap_image* and *cl_khr_gl_sharing* extensions are -supported by the OpenCL device, the *cl_khr_gl_sharing* extension may also -be used to create a mipmapped OpenCL image from a mipmapped OpenGL texture. - -To create a mipmapped OpenCL image from a mipmapped OpenGL texture, pass a -negative value as the _miplevel_ argument to {clCreateFromGLTexture}. -If _miplevel_ is a negative value then an OpenCL mipmapped image object is -created from a mipmapped OpenGL texture object, instead of an OpenCL image -object for a specific miplevel of the OpenGL texture. - -Note: For a detailed description of how the level of detail is computed, -please refer to _section 3.9.7_ of the OpenGL 3.0 specification. diff --git a/ext/cl_khr_pci_bus_info.asciidoc b/ext/cl_khr_pci_bus_info.asciidoc deleted file mode 100644 index fc724ca1..00000000 --- a/ext/cl_khr_pci_bus_info.asciidoc +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_pci_bus_info]] -== PCI Bus Information Query - -This extension adds a new query to obtain PCI bus information about an OpenCL -device. - -Not all OpenCL devices have PCI bus information, either due to the device not -being connected to the system through a PCI interface or due to platform -specific restrictions and policies. Thus this extension is only expected to be -supported by OpenCL devices which can provide the information. - -As a consequence, applications should always check for the presence of the -extension string for each individual OpenCL device for which they intend to -issue the new query for and should not have any assumptions about the -availability of the extension on any given platform. - -=== General Information - -==== Name Strings - -`cl_khr_pci_bus_info` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-04-19 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL API Specification Version V3.0.6. - -This extension requires OpenCL 1.0. - -=== New API Types - -Structure returned by the device info query for {CL_DEVICE_PCI_BUS_INFO_KHR}: - -include::{generated}/api/structs/cl_device_pci_bus_info_khr.txt[] - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo}: - -[source,opencl] ----- -#define CL_DEVICE_PCI_BUS_INFO_KHR 0x410F ----- - -=== Modifications to the OpenCL API Specification - -==== Section 4.2 - Querying Devices: - -Add to Table 5 - OpenCL Device Queries: - -[caption="Table 5. "] -.OpenCL Device Queries -[width="100%",cols="<30%,<20%,<50%",options="header"] -|==== -| DeviceInfo | Return Type | Description -| {CL_DEVICE_PCI_BUS_INFO_KHR} - | {cl_device_pci_bus_info_khr_TYPE} - | Returns PCI bus information for the device. - - The PCI bus information is returned as a single structure that includes - the PCI bus domain, the PCI bus identifier, the PCI device identifier, and - the PCI device function identifier. - -|==== diff --git a/ext/cl_khr_priority_hints.asciidoc b/ext/cl_khr_priority_hints.asciidoc deleted file mode 100644 index bdbcfe40..00000000 --- a/ext/cl_khr_priority_hints.asciidoc +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_priority_hints]] -== Priority Hints - -This section describes the *cl_khr_priority_hints* extension. -This extension adds priority hints for OpenCL, but does not specify the -scheduling behavior or minimum guarantees. -It is expected that the the user guides associated with each implementation -which supports this extension will describe the scheduling behavior -guarantees. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_priority_hints-host-side-api-modifications]] -=== Host-side API modifications - -The function {clCreateCommandQueueWithProperties} (Section 5.1) is -extended to support a priority value as part of the _properties_ argument. - -The priority property applies to OpenCL command-queues that belong to the -same OpenCL context. - -The properties field accepts the {CL_QUEUE_PRIORITY_KHR} property, with a -value of type {cl_queue_priority_khr_TYPE}, which can be one of: - - * {CL_QUEUE_PRIORITY_HIGH_KHR} - * {CL_QUEUE_PRIORITY_MED_KHR} - * {CL_QUEUE_PRIORITY_LOW_KHR} - -If {CL_QUEUE_PRIORITY_KHR} is not specified then the default priority is -{CL_QUEUE_PRIORITY_MED_KHR}. - -To the error section for {clCreateCommandQueueWithProperties}, the -following is added: - - * {CL_INVALID_QUEUE_PROPERTIES} if the {CL_QUEUE_PRIORITY_KHR} property is - specified and the queue is a {CL_QUEUE_ON_DEVICE}. diff --git a/ext/cl_khr_select_fprounding_mode.asciidoc b/ext/cl_khr_select_fprounding_mode.asciidoc deleted file mode 100644 index c1285bd2..00000000 --- a/ext/cl_khr_select_fprounding_mode.asciidoc +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_select_fprounding_mode]] -== Selecting the Rounding Mode **(DEPRECATED)** - -This section describes the *cl_khr_select_fprounding_mode* extension. -It allows an application to specify the rounding mode for an instruction or group of instructions in the program source. - -**This extension was deprecated in OpenCL 1.1 and its use is not recommended.** - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -=== Changes to OpenCL C specification - -With this extension, the rounding mode may be specified using the following *#pragma* in the OpenCL program source: - -[source,opencl_c] ----- -#pragma OPENCL SELECT_ROUNDING_MODE ----- - -The __ may be one of the following values: - -* *rte* - round to nearest even -* *rtz* - round to zero -* *rtp* - round to positive infinity -* *rtn* - round to negative infinity - -If this extensions is supported then the OpenCL implementation must support all four rounding modes for single precision floating-point. - -The *#pragma* sets the rounding mode for all instructions that operate on floating-point types (scalar or vector types) or produce floating-point values that follow this pragma in the program source until the next *#pragma*. -Note that the rounding mode specified for a block of code is known at compile time. -When inside a compound statement, the pragma takes effect from its occurrence until another *#pragma* is encountered (including within a nested compound statement), or until the end of the compound statement; at the end of a compound statement the state for the pragma is restored to its condition just before the compound statement. -Except where otherwise documented, the callee functions do not inherit the rounding mode of the caller function. - -If this extension is enabled, the `\\__ROUNDING_MODE__` preprocessor symbol shall be defined to be one of the following according to the current rounding mode: - -[source,opencl_c] ----- -#define __ROUNDING_MODE__ rte -#define __ROUNDING_MODE__ rtz -#define __ROUNDING_MODE__ rtp -#define __ROUNDING_MODE__ rtz ----- - -This is intended to enable remapping `foo()` to `foo_rte()` by the preprocessor by using: - -[source,opencl_c] ----- -#define foo foo ## __ROUNDING_MODE__ ----- - -The default rounding mode is round to nearest even. -The built-in math functions described in _section 6.11.2_, the common functions described in _section 6.11.4_ and the geometric functions described in _section 6.11.5_ are implemented with the round to nearest even rounding mode. -Various built-in conversions and the *vstore_half* and *vstorea_half* built-in functions that do not specify a rounding mode inherit the current rounding mode. -Conversions from floating-point to integer type always use `rtz` mode, except where the user specifically asks for another rounding mode. diff --git a/ext/cl_khr_semaphore.asciidoc b/ext/cl_khr_semaphore.asciidoc deleted file mode 100644 index 95c41522..00000000 --- a/ext/cl_khr_semaphore.asciidoc +++ /dev/null @@ -1,634 +0,0 @@ -// Copyright 2021-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_semaphore]] -== Semaphores (Provisional) - -OpenCL provides {cl_event_TYPE} as a primary mechanism of synchronization between host and device as well as across devices. -While events can be waited on or can be passed as dependencies across work-submissions, they suffer from following limitations: - -* They are immutable. - -* They are not reusable. - -This extension introduces a new type of synchronization object to represent semaphores that can be reused, waited on, and signaled multiple times by OpenCL work-submissions. - -In particular, this extension defines: - -* A new type called {cl_semaphore_khr_TYPE} to represent the semaphore objects. - -* A new type called {cl_semaphore_properties_khr_TYPE} to specify metadata associated with semaphores. - -* Routines to create, retain, and release semaphores. - -* Routines to wait on and signal semaphore objects. - -* Routine to query the properties of semaphore objects. - -=== General Information - -==== Name Strings - -`cl_khr_semaphore` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-09-10 | 0.9.0 | Initial version (provisional). -| 2023-08-01 | 0.9.1 | Changed device handle list enum to the semaphore-specific {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} (provisional). -|==== - -include::provisional_notice.asciidoc[] - -==== Dependencies - -This extension is written against the OpenCL Specification Version 3.0.8. - -This extension requires OpenCL 1.2. - -==== Contributors - -// spell-checker: disable -Ajit Hakke-Patil, NVIDIA + -Amit Rao, NVIDIA + -Balaji Calidas, QUALCOMM + -Ben Ashbaugh, INTEL + -Carsten Rohde, NVIDIA + -Christoph Kubisch, NVIDIA + -Debalina Bhattacharjee, NVIDIA + -Faith Ekstrand, INTEL + -Gorazd Sumkovski, ARM + -James Jones, NVIDIA + -Jeremy Kemp, IMAGINATION + -Joshua Kelly, QUALCOMM + -Karthik Raghavan Ravi, NVIDIA + -Kedar Patil, NVIDIA + -Kevin Petit, ARM + -Nikhil Joshi, NVIDIA + -Sharan Ashwathnarayan, NVIDIA + -Vivek Kini, NVIDIA + -// spell-checker: enable - -=== New Types - -[source] ----- -typedef struct _cl_semaphore_khr* cl_semaphore_khr; - -typedef cl_properties cl_semaphore_properties_khr; -typedef cl_uint cl_semaphore_info_khr; -typedef cl_uint cl_semaphore_type_khr; -typedef cl_ulong cl_semaphore_payload_khr; ----- - -=== New API Functions - -[source] ----- -cl_semaphore_khr clCreateSemaphoreWithPropertiesKHR( - cl_context context, - const cl_semaphore_properties_khr *sema_props, - cl_int *errcode_ret); - -cl_int clEnqueueWaitSemaphoresKHR( - cl_command_queue command_queue, - cl_uint num_sema_objects, - const cl_semaphore_khr *sema_objects, - const cl_semaphore_payload_khr *sema_payload_list, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clEnqueueSignalSemaphoresKHR( - cl_command_queue command_queue, - cl_uint num_sema_objects, - const cl_semaphore_khr *sema_objects, - const cl_semaphore_payload_khr *sema_payload_list, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event); - -cl_int clGetSemaphoreInfoKHR( - cl_semaphore_khr sema_object, - cl_semaphore_info_khr param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -cl_int clReleaseSemaphoreKHR(cl_semaphore_khr sema_object); - -cl_int clRetainSemaphoreKHR(cl_semaphore_khr sema_object); ----- - -=== New API Enums - -Accepted value for the _param_name_ parameter to {clGetPlatformInfo} to query the semaphore types supported by an OpenCL platform: - -[source] ----- -CL_PLATFORM_SEMAPHORE_TYPES_KHR 0x2036 ----- - -Accepted value for the _param_name_ parameter to {clGetDeviceInfo} to query the semaphore types supported by an OpenCL device: - -[source] ----- -CL_DEVICE_SEMAPHORE_TYPES_KHR 0x204C ----- - -Semaphore types: - -[source] ----- -CL_SEMAPHORE_TYPE_BINARY_KHR 1 ----- - -New attributes that can be passed as part of {cl_semaphore_info_khr_TYPE}: - -[source] ----- -CL_SEMAPHORE_CONTEXT_KHR 0x2039 -CL_SEMAPHORE_REFERENCE_COUNT_KHR 0x203A -CL_SEMAPHORE_PROPERTIES_KHR 0x203B -CL_SEMAPHORE_PAYLOAD_KHR 0x203C ----- - -New attributes that can be passed as part of {cl_semaphore_info_khr_TYPE} or {cl_semaphore_properties_khr_TYPE}: - -[source] ----- -CL_SEMAPHORE_TYPE_KHR 0x203D -CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR 0x2053 -CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR 0 ----- - -New return values from {clGetEventInfo} when _param_name_ is {CL_EVENT_COMMAND_TYPE}: - -[source] ----- -CL_COMMAND_SEMAPHORE_WAIT_KHR 0x2042 -CL_COMMAND_SEMAPHORE_SIGNAL_KHR 0x2043 ----- - -The following error codes can be returned by APIs introduced as part of this specification or the specifications that depend on this: -[source] ----- -CL_INVALID_SEMAPHORE_KHR -1142 ----- - -=== Modifications to existing APIs added by this spec - -Following new enums are added to the list of supported _param_names_ by {clGetPlatformInfo}: - -.List of supported param_names by {clGetPlatformInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Platform Info | Return Type | Description -| {CL_PLATFORM_SEMAPHORE_TYPES_KHR} - | {cl_semaphore_type_khr_TYPE}[] - | Returns the list of the semaphore types supported all devices in _platform_. -|==== - -{clGetPlatformInfo} when called with _param_name_ {CL_PLATFORM_SEMAPHORE_TYPES_KHR} must return common list of semaphore types supported by all devices in the platform. - -Following new enums are added to the list of supported _param_names_ by {clGetDeviceInfo}: - -.List of supported param_names by {clGetDeviceInfo} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Device Info | Return Type | Description -| {CL_DEVICE_SEMAPHORE_TYPES_KHR} - | {cl_semaphore_type_khr_TYPE}[] - | Returns the list of the semaphore types supported by _device_. -|==== - -{clGetDeviceInfo} when called with param_name {CL_DEVICE_SEMAPHORE_TYPES_KHR} must return a non-empty list of semaphore types for at least one of the devices in the platform. -The results of this query should meet minimum requirements for {cl_semaphore_type_khr_TYPE} as described by <>. - -=== Description of new types added by this spec - -Following new types are added: - -* {cl_semaphore_type_khr_TYPE} to represent the different types of semaphores. - ** It is mandatory to support {CL_SEMAPHORE_TYPE_BINARY_KHR}. - -* {cl_semaphore_properties_khr_TYPE} to represent properties associated with semaphores. - ** {CL_SEMAPHORE_TYPE_KHR} must be supported. - -* {cl_semaphore_info_khr_TYPE} to represent queries to get additional information about semaphores. - ** All enums described in New API Enums for {cl_semaphore_info_khr_TYPE} must be supported. - -* {cl_semaphore_payload_khr_TYPE} to represent payload values of semaphores. - -* {cl_semaphore_khr_TYPE} to represent semaphore objects. - -Note that above types can be extended in future based on the need for additional types of semaphore and properties required by them. -The specifics of the same can be added as a newer version of this specification or by a separate specification that depends on this for basic semaphore support. - -=== Description of new APIs added by this spec - -The following new APIs are added as part of this spec. The details of each are described below: - -==== Creating semaphores - -A *semaphore object* may be created using the function - -include::{generated}/api/protos/clCreateSemaphoreWithPropertiesKHR.txt[] - -_context_ identifies a valid OpenCL context that the created {cl_semaphore_khr_TYPE} will belong to. - -_sema_props_ specifies additional semaphore properties in the form list of pairs terminated with 0. -{CL_SEMAPHORE_TYPE_KHR} must be part of the list of properties specified by _sema_props_. - -Following new properties are added to the list of possible supported properties by {cl_semaphore_properties_khr_TYPE} that can be passed to {clCreateSemaphoreWithPropertiesKHR}: - -.List of supported semaphore creation properties by {clCreateSemaphoreWithPropertiesKHR} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Semaphore Property | Property Value | Description -| {CL_SEMAPHORE_TYPE_KHR} - | {cl_semaphore_type_khr_TYPE} - | Specifies the type of semaphore to create. This property is always required. -| {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} - | {cl_device_id_TYPE}[] - | Specifies the list of OpenCL devices (terminated with {CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR}) to associate with the semaphore. Only a single device is permitted in the list. -|==== - -If {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not specified as part of _sema_props_, the semaphore object created by {clCreateSemaphoreWithPropertiesKHR} is by default accessible to all devices in the _context_. For a multi-device context {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} must be specified in _sema_props_. - -_errcode_ret_ returns an appropriate error code. If _errcode_ret_ is `NULL`, no error code is returned. - -{clCreateSemaphoreWithPropertiesKHR} returns a valid semaphore object in an un-signaled state and and _errcode_ret_ is set to {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns a `NULL` value with one of the following error values returned in _errcode_ret_: - -* {CL_INVALID_CONTEXT} if _context_ is not a valid context. -* {CL_INVALID_PROPERTY} if a property name in _sema_props_ is not a supported property name, if the value specified for a supported property name is not valid, or if the same property name is specified more than once. Additionally, if _context_ is a multiple device context and _sema_props_ does not specify {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR}. -* {CL_INVALID_DEVICE} if {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is specified as part of _sema_props_, but it does not identify exactly one valid device or if a device identified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} is not one of the devices within _context_. -* {CL_INVALID_VALUE} -** if _sema_props_ is `NULL`, or -** if _sema_props_ do not specify pairs for minimum set of properties (i.e. {CL_SEMAPHORE_TYPE_KHR}) required for successful creation of a {cl_semaphore_khr_TYPE}, or -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -==== Waiting on and signaling semaphores - -To enqueue a command to wait on a set of semaphores, call the function - -include::{generated}/api/protos/clEnqueueWaitSemaphoresKHR.txt[] - -_command_queue_ specifies a valid command-queue. - -_num_sema_objects_ specifies the number of semaphore objects to wait on. - -_sema_objects_ points to the list of semaphore objects to wait on. -The length of the list must be at least _num_sema_objects_. - -_sema_payload_list_ points to the list of values of type {cl_semaphore_payload_khr_TYPE} containing valid semaphore payload values to wait on. -This can be set to `NULL` or will be ignored when all semaphores in the list of _sema_objects_ are of type {CL_SEMAPHORE_TYPE_BINARY_KHR}. - -_num_events_in_wait_list_ specifies the number of events in _event_wait_list_. - -_event_wait_list_ specifies list of events that need to complete before {clEnqueueWaitSemaphoresKHR} can be executed. -If _event_wait_list_ is `NULL`, then {clEnqueueWaitSemaphoresKHR} does not wait on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and _num_events_in_wait_list_ must be greater than 0. -The events specified in _event_wait_list_ act as synchronization points. -The context associated with events in _event_wait_list_ and that associated with _command_queue_ must be the same. - -_event_ returns an event object that identifies this particular command and can be used to query or queue a wait for this particular command to complete. -_event_ can be `NULL` in which case it will not be possible for the application to query the status of this command or queue a wait for this command to complete. - -The semaphore wait command waits for a list of events to complete and a list of semaphore objects to become signaled. -The semaphore wait command returns an _event_ which can be waited on to ensure that all events in the _event_wait_list_ have completed and all semaphores in _sema_objects_ have been signaled. -{clEnqueueWaitSemaphoresKHR} will not return until the binary semaphores in _sema_objects_ are in a state that makes them safe to re-signal. If necessary, implementations may block in {clEnqueueWaitSemaphoresKHR} to ensure the correct state of semaphores when returning. There are no implications from this behavior for the state of _event_ or the events in _event_wait_list_ when {clEnqueueWaitSemaphoresKHR} returns. Waiting on the same binary semaphore twice without an interleaving signal may lead to undefined behavior. - -{clEnqueueWaitSemaphoresKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_QUEUE} -** if _command_queue_ is not a valid command-queue, or -** if the device associated with _command_queue_ is not same as one of the devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _sema_objects_. -* {CL_INVALID_VALUE} if _num_sema_objects_ is 0. -* {CL_INVALID_SEMAPHORE_KHR} if any of the semaphore objects specified by _sema_objects_ is not valid. -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and any of the semaphore objects in _sema_objects_ are not the same or if the context associated with _command_queue_ and that associated with events in _event_wait_list_ are not the same. -* {CL_INVALID_VALUE} if any of the semaphore objects specified by _sema_objects_ requires a semaphore payload and _sema_payload_list_ is `NULL`. -* {CL_INVALID_EVENT_WAIT_LIST} - ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, or - ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or - ** if event objects in _event_wait_list_ are not valid events. -* {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of any of the events in _event_wait_list_ is a negative integer value. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -To enqueue a command to signal a set of semaphores, call the function - -include::{generated}/api/protos/clEnqueueSignalSemaphoresKHR.txt[] - -_command_queue_ specifies a valid command-queue. - -_num_sema_objects_ specifies the number of semaphore objects to signal. - -_sema_objects_ points to the list of semaphore objects to signal. -The length of the list must be at least _num_sema_objects_. - -_sema_payload_list_ points to the list of values of type {cl_semaphore_payload_khr_TYPE} containing semaphore payload values to signal. -This can be set to `NULL` or will be ignored when all semaphores in the list of _sema_objects_ are of type {CL_SEMAPHORE_TYPE_BINARY_KHR}. - -_num_events_in_wait_list_ specifies the number of events in event_wait_list. - -_event_wait_list_ points to the list of events that need to complete before {clEnqueueSignalSemaphoresKHR} can be executed. -If _event_wait_list_ is `NULL`, then {clEnqueueSignalSemaphoresKHR} does not wait on any event to complete. -If _event_wait_list_ is `NULL`, _num_events_in_wait_list_ must be 0. -If _event_wait_list_ is not `NULL`, the list of events pointed to by _event_wait_list_ must be valid and -_num_events_in_wait_list_ must be greater than 0. -The events specified in _event_wait_list_ act as synchronization points. -The context associated with events in _event_wait_list_ and that associated with _command_queue_ must be the same. - -_event_ returns an event object that identifies this particular command and can be used to query or queue a wait for this particular command to complete. -_event_ can be `NULL` in which case it will not be possible for the application to query the status of this command -or queue a wait for this command to complete. - -The semaphore signal command waits for a list of events to complete and then signals a list of semaphore objects. -The semaphore signal command returns an _event_ which can be waited on to ensure that all events in the _event_wait_list_ have completed and all semaphores in _sema_objects_ have been signaled. -The successful completion of the event generated by {clEnqueueSignalSemaphoresKHR} called on one or more semaphore objects of type {CL_SEMAPHORE_TYPE_BINARY_KHR} changes the state of the corresponding semaphore objects to signaled. {clEnqueueSignalSemaphoresKHR} will not return until the binary semaphores in _sema_objects_ are in a state that makes them safe to wait on again. If necessary, implementations may block in {clEnqueueSignalSemaphoresKHR} to ensure the correct state of semaphores when returning. There are no implications from this behavior for the state of _event_ or the events in _event_wait_list_ when {clEnqueueSignalSemaphoresKHR} returns. Signaling the same binary semaphore twice without an interleaving wait may lead to undefined behavior. - -{clEnqueueSignalSemaphoresKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_QUEUE} -** if _command_queue_ is not a valid command-queue, or -** if the device associated with _command_queue_ is not same as one of the devices specified by {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} at the time of creating one or more of _sema_objects_. -* {CL_INVALID_VALUE} if _num_sema_objects_ is 0 -* {CL_INVALID_SEMAPHORE_KHR} if any of the semaphore objects specified by _sema_objects_ is not valid. -* {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and any of the semaphore objects in _sema_objects_ are not the same or if the context associated with _command_queue_ and that associated with events in _event_wait_list_ are not the same. -* {CL_INVALID_VALUE} if any of the semaphore objects specified by _sema_objects_ requires a semaphore payload and _sema_payload_list_ is `NULL`. -* {CL_INVALID_EVENT_WAIT_LIST} - ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, or - ** if _event_wait_list_ is not `NULL` and _num_events_in_wait_list_ is 0, or - ** if event objects in _event_wait_list_ are not valid events. -* {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST} if the execution status of any of the events in _event_wait_list_ is a negative integer value. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -==== Semaphore Queries - -To query information about a semaphore object, call the function - -include::{generated}/api/protos/clGetSemaphoreInfoKHR.txt[] - -_sema_object_ specifies the semaphore object being queried. - -_param_name_ is a constant that specifies the semaphore information to query, and must be one of the values shown in the <> table. - -_param_value_ is a pointer to memory where the result of the query is returned as described in the <> table. If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ specifies the size in bytes of memory pointed to _param_value_. This size must be greater than or equal to the size of the return type described in the <> table. - -_param_value_size_ret_ returns the actual size in bytes of data -being queried by _param_value_. If _param_value_size_ret_ is `NULL`, it is ignored. - -[[cl_khr_semaphore_info-table]] -.List of supported param_names by {clGetSemaphoreInfoKHR} -[width="100%",cols="<33%,<17%,<50%",options="header"] -|==== -| Semaphore Info | Return Type | Description -| {CL_SEMAPHORE_CONTEXT_KHR} - | {cl_context_TYPE} - | Returns the context specified when the semaphore is created. - -| {CL_SEMAPHORE_REFERENCE_COUNT_KHR} footnote:[{fn-reference-count-usage}] - | {cl_uint_TYPE} - | Returns the semaphore reference count. - -| {CL_SEMAPHORE_PROPERTIES_KHR} - | {cl_semaphore_properties_khr_TYPE}[] - | Return the properties argument specified in - {clCreateSemaphoreWithPropertiesKHR}. - - The implementation must return the values specified in the properties - argument in the same order and without including additional properties. - -| {CL_SEMAPHORE_TYPE_KHR} - | {cl_semaphore_type_khr_TYPE} - | Returns the semaphore type. - -| {CL_SEMAPHORE_PAYLOAD_KHR} - | {cl_semaphore_payload_khr_TYPE} - | Returns the semaphore payload value. For semaphores of type - {CL_SEMAPHORE_TYPE_BINARY_KHR}, the payload value returned will be `0` - if the semaphore is in an un-signaled state and `1` if it is in a - signaled state. - -| {CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR} - | {cl_device_id_TYPE}[] - | Returns the list of OpenCL devices the semaphore is associated with. -|==== - -{clGetSemaphoreInfoKHR} returns {CL_SUCCESS} if the information is queried successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_SEMAPHORE_KHR} -** if _sema_object_ is not a valid semaphore -* {CL_INVALID_VALUE} -** if _param_name_ is not one of the attribute defined in the <> table or -** if _param_value_size_ is less than the size of Return Type of the corresponding _param_name_ attribute as defined in the <> table. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -==== Retaining and Releasing Semaphores - -To release a semaphore object, call the function - -include::{generated}/api/protos/clReleaseSemaphoreKHR.txt[] - -_sema_object_ specifies the semaphore object to be released. - -The _sema_object_ reference count is decremented. - -{clReleaseSemaphoreKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_SEMAPHORE_KHR} if _sema_object_ is not a valid semaphore object. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -After the reference count becomes zero and commands queued for execution on a -command-queue(s) that use _sema_object_ have finished, the semaphore object is -deleted. -Using this function to release a reference that was not obtained by creating the -object via {clCreateSemaphoreWithPropertiesKHR} or by calling -{clRetainSemaphoreKHR} causes undefined behavior. - -To retain a semaphore object, call the function - -include::{generated}/api/protos/clRetainSemaphoreKHR.txt[] - -_sema_object_ specifies the semaphore object to be retained. - -increments the reference count of _sema_object_. - -{clRetainSemaphoreKHR} returns {CL_SUCCESS} if the function is executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_SEMAPHORE_KHR} if _sema_object_ is not a valid semaphore object. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -[[cl_khr_semaphore-Sample-Code]] -=== Sample Code - -. Example for semaphore creation in a single device context -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with just first device -context = clCreateContext(..., 1, devices, ...); - -// Create clSema of type cl_semaphore_khr usable on single device in the context - -cl_semaphore_properties_khr sema_props[] = - {(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - 0}; - -int errcode_ret = 0; - -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); ----- --- - -. Example for semaphore creation for a single device in a multi-device context -+ --- -[source] ----- -// Get cl_devices of the platform. -clGetDeviceIDs(..., &devices, &deviceCount); - -// Create cl_context with first two devices -clCreateContext(..., 2, devices, ...); - -// Create clSema of type cl_semaphore_khr usable only on device 0 -cl_semaphore_properties_khr sema_props[] = { - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, - (cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR, - (cl_semaphore_properties_khr)devices[0], - CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR, - 0 -}; - -int errcode_ret = 0; - -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); ----- --- -. Example for synchronization using Wait and Signal -+ --- -[source] ----- -// clSema is created using clCreateSemaphoreWithPropertiesKHR -// using one of the examples for semaphore creation. - -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - -// Start the main loop - -while (true) { - // (not shown) Signal the semaphore from other work - - // Wait for the semaphore in OpenCL - // by calling clEnqueueWaitSemaphoresKHR on 'clSema' - clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel that accesses extMem - clEnqueueNDRangeKernel(command_queue, ...); - - // Signal the semaphore in OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // (not shown) Launch other work that waits on 'clSema' -} ----- --- -. Example for {clGetSemaphoreInfoKHR} -+ --- -[source] ----- -// clSema is created using clCreateSemaphoreWithPropertiesKHR -// using one of the examples for semaphore creation. - -cl_semaphore_khr clSema = clCreateSemaphoreWithPropertiesKHR(context, - sema_props, - &errcode_ret); - -// Start the main rendering loop - -while (true) { - // (not shown) Signal the semaphore from other work - - // Wait for the semaphore in OpenCL, by calling clEnqueueWaitSemaphoresKHR on 'clSema' - clEnqueueWaitSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Launch kernel in OpenCL - clEnqueueNDRangeKernel(command_queue, ...); - - // Signal the semaphore in OpenCL - clEnqueueSignalSemaphoresKHR(/*command_queue*/ command_queue, - /*num_sema_objects*/ 1, - /*sema_objects*/ &clSema, - /*sema_payload_list*/ NULL, - /*num_events_in_wait_list*/ 0, - /*event_wait_list*/ NULL, - /*event*/ NULL); - - // Query type of clSema - clGetSemaphoreInfoKHR(/*sema_object*/ clSema, - /*param_name*/ CL_SEMAPHORE_TYPE_KHR, - /*param_value_size*/ sizeof(cl_semaphore_type_khr), - /*param_value*/ &clSemaType, - /*param_value_ret_size*/ &clSemaTypeSize); - - if (clSemaType == CL_SEMAPHORE_TYPE_BINARY_KHR) { - // Do something - } - else { - // Do something else - } - // (not shown) Launch other work that waits on 'clSema' -} ----- --- diff --git a/ext/cl_khr_spir.asciidoc b/ext/cl_khr_spir.asciidoc deleted file mode 100644 index d5d0d47f..00000000 --- a/ext/cl_khr_spir.asciidoc +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_spir]] -== Standard Portable Intermediate Representation Binaries - -This extension adds the ability to create an OpenCL program object from a -Standard Portable Intermediate Representation (SPIR) instance. -A SPIR instance is a vendor-neutral non-source representation for OpenCL C -programs. - -The extension name is *cl_khr_spir*. -This extension has been superseded by the SPIR-V intermediate -representation, which is supported by the *cl_khr_il_program* extension, -and is a core feature in OpenCL 2.1. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_spir-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -*Add a new device property to _table 4.3_ in _section 4.2_:* - -.List of supported param_names by {clGetDeviceInfo} -[cols="2,1,4",options="header",] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_SPIR_VERSIONS} -| {char_TYPE}[] -| A space separated list of SPIR versions supported by the device. - - For example, returning `"1.2"` in this query implies that SPIR version 1.2 - is supported by the implementation. - -|==== - -[[cl_khr_spir-additions-to-chapter-5]] -=== Additions to Chapter 5 of the OpenCL 2.2 Specification - -*Additions to _section 5.8.1_ -- Creating Program Objects:* - -"{clCreateProgramWithBinary} can be used to load a SPIR binary. -Once a program object has been created from a SPIR binary, {clBuildProgram} -can be called to build a program executable or {clCompileProgram} can be -called to compile the SPIR binary." - -Modify the {CL_PROGRAM_BINARY_TYPE} entry in _table 5.14_ -for {clGetProgramBuildInfo} to add a potential value -{CL_PROGRAM_BINARY_TYPE_INTERMEDIATE}: - -.List of supported param_names by {clGetProgramBuildInfo} -[cols="2,1,4",options="header",] -|==== -| Program Build Info -| Return Type -| Description - -| {CL_PROGRAM_BINARY_TYPE} -| {cl_program_binary_type_TYPE} -| {CL_PROGRAM_BINARY_TYPE_INTERMEDIATE} -- An intermediate (non-source) - representation for the program is loaded as a binary. - The program must be further processed with {clCompileProgram} or - {clBuildProgram}. - - If processed with {clCompileProgram}, the result will be a binary of type - {CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT} or {CL_PROGRAM_BINARY_TYPE_LIBRARY}. - If processed with {clBuildProgram}, the result will be a binary of type - {CL_PROGRAM_BINARY_TYPE_EXECUTABLE}. - -|==== - -*Additions to _section 5.8.4_ -- Compiler Options:* - -"The compile option `-x spir` must be specified to indicate that the binary -is in SPIR format, and the compile option `-spir-std` must be used to -specify the version of the SPIR specification that describes the format and -meaning of the binary. -For example, if the binary is as described in SPIR version 1.2, then -`-spir-std=1.2` must be specified. -Failing to specify these compile options may result in implementation-defined -behavior." - -*Additions to _section 5.8.5_ -- Separate Compilation and Linking of Programs:* - -Replace this error for {clCompileProgram}: - - * {CL_INVALID_OPERATION} if _program_ has no source or IL available, i.e. it - has not been created with {clCreateProgramWithSource} or - {clCreateProgramWithIL}. - -with: - - * {CL_INVALID_OPERATION} if _program_ has no source or IL available, i.e. it - has not been created with {clCreateProgramWithSource} or - {clCreateProgramWithIL} or {clCreateProgramWithBinary} where `-x spir` is present in _options_. - -*Additions to _section 5.9.3_ -- Kernel Object Queries:* - -Modify following text in {clGetKernelArgInfo} from: - -"Kernel argument information is only available if the program object -associated with _kernel_ is created with {clCreateProgramWithSource} and the -program executable is built with the -cl-kernel-arg-info option specified in -_options_ argument to {clBuildProgram} or {clCompileProgram}." - -to: - -"Kernel argument information is only available if the program object -associated with _kernel_ is created with {clCreateProgramWithSource} and the -program executable is built with the `-cl-kernel-arg-info option` specified in -_options_ argument to {clBuildProgram} or {clCompileProgram}, or if the -program object associated with _kernel_ is created with -{clCreateProgramWithBinary} and the program executable is built with the -`-cl-kernel-arg-info` and `-x spir` options specified in _options_ argument to -{clBuildProgram} or {clCompileProgram}." diff --git a/ext/cl_khr_srgb_image_writes.asciidoc b/ext/cl_khr_srgb_image_writes.asciidoc deleted file mode 100644 index 63c7444f..00000000 --- a/ext/cl_khr_srgb_image_writes.asciidoc +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_srgb_image_writes]] -== sRGB Image Writes - -This section describes the *cl_khr_srgb_image_writes* extension. - -This extension enables kernels to write to sRGB images using the *write_imagef* built-in function. -The sRGB image formats that may be written to will be returned by {clGetSupportedImageFormats}. - -When the image is an sRGB image, the *write_imagef* built-in function will perform the linear to sRGB conversion. -Only the R, G, and B components are converted from linear to sRGB; the A component is written as-is. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== diff --git a/ext/cl_khr_subgroup_extensions.asciidoc b/ext/cl_khr_subgroup_extensions.asciidoc deleted file mode 100644 index 9f131cb0..00000000 --- a/ext/cl_khr_subgroup_extensions.asciidoc +++ /dev/null @@ -1,1071 +0,0 @@ -== Extended Sub-group Functions - -[[extended-sub-groups]] -=== Overview - -This section describes a family of extensions that provide extended sub-group functionality. -The extensions in this family are: - -* `cl_khr_subgroup_extended_types` -* `cl_khr_subgroup_non_uniform_vote` -* `cl_khr_subgroup_ballot` -* `cl_khr_subgroup_non_uniform_arithmetic` -* `cl_khr_subgroup_shuffle` -* `cl_khr_subgroup_shuffle_relative` -* `cl_khr_subgroup_clustered_reduce` - -The functionality added by these extensions includes: - -* Additional data type support for sub-group broadcast, scan, and reduction functions; -* The ability to elect a single work item from a sub-group to perform a task; -* The ability to hold votes among work items in a sub-group; -* The ability to collect and operate on ballots from work items in the sub-group; -* The ability to use some sub-group functions, such as any, all, broadcasts, scans, and reductions within non-uniform flow control; -* Additional scan and reduction operators; -* Additional ways to exchange data among work items in a sub-group; -* Clustered reductions, that operate on a subset of work items in the sub-group. - -This section describes changes to the OpenCL C Language for these extensions. -There are no new API functions or enums added by these extensions. - -=== General Information - -==== Version History - -For all of the extensions described in this section: - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-12-15 | 1.0.0 | First assigned version. -|==== - -[[extended-sub-groups-summary]] -=== Summary of New OpenCL C Functions - -[source,opencl_c] ----- -// These functions are available to devices supporting -// cl_khr_subgroup_extended_types: - -// Note: Existing functions supporting additional data types. - -gentype sub_group_broadcast( gentype value, uint index ) - -gentype sub_group_reduce_add( gentype value ) -gentype sub_group_reduce_min( gentype value ) -gentype sub_group_reduce_max( gentype value ) - -gentype sub_group_scan_inclusive_add( gentype value ) -gentype sub_group_scan_inclusive_min( gentype value ) -gentype sub_group_scan_inclusive_max( gentype value ) - -gentype sub_group_scan_exclusive_add( gentype value ) -gentype sub_group_scan_exclusive_min( gentype value ) -gentype sub_group_scan_exclusive_max( gentype value ) - -// These functions are available to devices supporting -// cl_khr_subgroup_non_uniform_vote: - -int sub_group_elect() - -int sub_group_non_uniform_all( int predicate ) -int sub_group_non_uniform_any( int predicate ) -int sub_group_non_uniform_all_equal( gentype value ) - -// These functions are available to devices supporting -// cl_khr_subgroup_ballot: - -gentype sub_group_non_uniform_broadcast( gentype value, uint index ) -gentype sub_group_broadcast_first( gentype value ) - -uint4 sub_group_ballot( int predicate ) -int sub_group_inverse_ballot( uint4 value ) -int sub_group_ballot_bit_extract( uint4 value, uint index ) -uint sub_group_ballot_bit_count( uint4 value ) -uint sub_group_ballot_inclusive_scan( uint4 value ) -uint sub_group_ballot_exclusive_scan( uint4 value ) -uint sub_group_ballot_find_lsb( uint4 value ) -uint sub_group_ballot_find_msb( uint4 value ) - -uint4 get_sub_group_eq_mask() -uint4 get_sub_group_ge_mask() -uint4 get_sub_group_gt_mask() -uint4 get_sub_group_le_mask() -uint4 get_sub_group_lt_mask() - -// These functions are available to devices supporting -// cl_khr_subgroup_non_uniform_arithmetic: - -gentype sub_group_non_uniform_reduce_add( gentype value ) -gentype sub_group_non_uniform_reduce_mul( gentype value ) -gentype sub_group_non_uniform_reduce_min( gentype value ) -gentype sub_group_non_uniform_reduce_max( gentype value ) -gentype sub_group_non_uniform_reduce_and( gentype value ) -gentype sub_group_non_uniform_reduce_or( gentype value ) -gentype sub_group_non_uniform_reduce_xor( gentype value ) -int sub_group_non_uniform_reduce_logical_and( int predicate ) -int sub_group_non_uniform_reduce_logical_or( int predicate ) -int sub_group_non_uniform_reduce_logical_xor( int predicate ) - -gentype sub_group_non_uniform_scan_inclusive_add( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_mul( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_min( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_max( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_and( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_or( gentype value ) -gentype sub_group_non_uniform_scan_inclusive_xor( gentype value ) -int sub_group_non_uniform_scan_inclusive_logical_and( int predicate ) -int sub_group_non_uniform_scan_inclusive_logical_or( int predicate ) -int sub_group_non_uniform_scan_inclusive_logical_xor( int predicate ) - -gentype sub_group_non_uniform_scan_exclusive_add( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_mul( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_min( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_max( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_and( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_or( gentype value ) -gentype sub_group_non_uniform_scan_exclusive_xor( gentype value ) -int sub_group_non_uniform_scan_exclusive_logical_and( int predicate ) -int sub_group_non_uniform_scan_exclusive_logical_or( int predicate ) -int sub_group_non_uniform_scan_exclusive_logical_xor( int predicate ) - -// These functions are available to devices supporting -// cl_khr_subgroup_shuffle: - -gentype sub_group_shuffle( gentype value, uint index ) -gentype sub_group_shuffle_xor( gentype value, uint mask ) - -// These functions are available to devices supporting -// cl_khr_subgroup_shuffle_relative: - -gentype sub_group_shuffle_up( gentype value, uint delta ) -gentype sub_group_shuffle_down( gentype value, uint delta ) - -// These functions are available to devices supporting -// cl_khr_subgroup_clustered_reduce: - -gentype sub_group_clustered_reduce_add( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_mul( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_min( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_max( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_and( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_or( gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_xor( gentype value, uint clustersize ) -int sub_group_clustered_reduce_logical_and( int predicate, uint clustersize ) -int sub_group_clustered_reduce_logical_or( int predicate, uint clustersize ) -int sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize ) ----- - -[[cl_khr_subgroup_extended_types]] -=== Extended Types - -This section describes functionality added by `cl_khr_subgroup_extended_types`. -This extension adds additional supported data types to the existing sub-group broadcast, scan, and reduction functions. - -==== Modify the Existing Section Describing Sub-group Functions - -Modify the first paragraph in this section that describes `gentype` type support for the sub-group `broadcast`, `scan`, and `reduction` functions to add scalar `char`, `uchar`, `short`, and `ushort` support, and to additionally add built-in vector type support for `broadcast` specifically. -The functions in the table and their descriptions remain unchanged by this extension: - -The table below describes OpenCL C programming language built-in functions that operate on a sub-group level. -These built-in functions must be encountered by all work items in the sub-group executing the kernel. -We use the generic type name `gentype` to indicate the built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -For the `sub_group_broadcast` function, the generic type name `gentype` may additionally be one of the supported built-in vector data types `char__n__`, `uchar__n__`, `short__n__`, `ushort__n__`, `int__n__`, `uint__n__`, `long__n__`, `ulong__n__`, `float__n__`, `double__n__` (if double precision is supported), or `half__n__` (if half precision is supported). - -[[cl_khr_subgroup_non_uniform_vote]] -=== Votes and Elections - -This section describes functionality added by `cl_khr_subgroup_non_uniform_vote`. -This extension adds the ability to elect a single work item from a sub-group to perform a task and to hold votes among work items in a sub-group. - -==== Add a new Section 6.15.X - Sub-group Vote and Elect Built-in Functions - -The table below describes the OpenCL C programming language built-in functions to elect a single work item in a sub-group to perform a task and to collectively vote to determine a boolean condition for the sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be the one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -int sub_group_elect() ----- -| Elects a single work item in the sub-group to perform a task. -This function will return true (nonzero) for the active work item in the sub-group with the smallest sub-group local ID, and false (zero) for all other active work items in the sub-group. - -|[source,opencl_c] ----- -int sub_group_non_uniform_all( - int predicate ) ----- -| Examines _predicate_ for all active work items in the sub-group and returns a non-zero value if _predicate_ is non-zero for all active work items in the sub-group and zero otherwise. - -Note: This behavior is the same as `sub_group_all` from `cl_khr_subgroups` and OpenCL 2.1, except this function need not be encountered by all work items in the sub-group executing the kernel. - -|[source,opencl_c] ----- -int sub_group_non_uniform_any( - int predicate ) ----- -| Examines _predicate_ for all active work items in the sub-group and returns a non-zero value if _predicate_ is non-zero for any active work item in the sub-group and zero otherwise. - -Note: This behavior is the same as `sub_group_any` from `cl_khr_subgroups` and OpenCL 2.1, except this function need not be encountered by all work items in the sub-group executing the kernel. - -|[source,opencl_c] ----- -int sub_group_non_uniform_all_equal( - gentype value ) ----- -| Examines _value_ for all active work items in the sub-group and returns a non-zero value if _value_ is equivalent for all active invocations in the sub-group and zero otherwise. - -Integer types use a bitwise test for equality. Floating-point types use an ordered floating-point test for equality. - -|======================================================================= - -[[cl_khr_subgroup_ballot]] -=== Ballots - -This section describes functionality added by `cl_khr_subgroup_ballot`. -This extension adds the ability to collect and operate on ballots from work items in the sub-group. - -==== Add a new Section 6.15.X - Sub-group Ballot Built-in Functions - -The table below describes the OpenCL C programming language built-in functions to allow work items in a sub-group to collect and operate on ballots from work items in the sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. - -For the `sub_group_non_uniform_broadcast` and `sub_group_broadcast_first` functions, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -For the `sub_group_non_uniform_broadcast` function, the generic type name `gentype` may additionally be one of the supported built-in vector data types `char__n__`, `uchar__n__`, `short__n__`, `ushort__n__`, `int__n__`, `uint__n__`, `long__n__`, `ulong__n__`, `float__n__`, `double__n__` (if double precision is supported), or `half__n__` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_broadcast( - gentype value, - uint index ) ----- -| Returns _value_ for the work item with sub-group local ID equal to _index_. - -Behavior is undefined when the value of _index_ is not equivalent for all active work items in the sub-group. - -The return value is undefined if the work item with sub-group local ID equal to _index_ is inactive or if _index_ is greater than or equal to the size of the sub-group. - -|[source,opencl_c] ----- -gentype sub_group_broadcast_first( - gentype value ) ----- -| Returns _value_ for the work item with the smallest sub-group local ID among active work items in the sub-group. - -|[source,opencl_c] ----- -uint4 sub_group_ballot( - int predicate ) ----- -| Returns a bitfield combining the _predicate_ values from all work items in the sub-group. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. -The representative bit in the bitfield is set if the work item is active and the _predicate_ is non-zero, and is unset otherwise. - -|[source,opencl_c] ----- -int sub_group_inverse_ballot( - uint4 value ) ----- -| Returns the predicate value for this work item in the sub-group from the bitfield _value_ representing predicate values from all work items in the sub-group. -The predicate return value will be non-zero if the bit in the bitfield _value_ for this work item is set, and zero otherwise. - -Behavior is undefined when _value_ is not equivalent for all active work items in the sub-group. - -This is a specialized function that may perform better than the equivalent `sub_group_ballot_bit_extract` on some implementations. - -|[source,opencl_c] ----- -int sub_group_ballot_bit_extract( - uint4 value, - uint index ) ----- -| Returns the predicate value for the work item with sub-group local ID equal to _index_ from the bitfield _value_ representing predicate values from all work items in the sub-group. -The predicate return value will be non-zero if the bit in the bitfield _value_ for the work item with sub-group local ID equal to _index_ is set, and zero otherwise. - -The predicate return value is undefined if the work item with sub-group local ID equal to _index_ is greater than or equal to the size of the sub-group. - -|[source,opencl_c] ----- -uint sub_group_ballot_bit_count( - uint4 value ) ----- -| Returns the number of bits that are set in the bitfield _value_, only considering the bits in _value_ that represent predicate values corresponding to sub-group local IDs less than the maximum sub-group size within the dispatch (as returned by `get_max_sub_group_size`). - -|[source,opencl_c] ----- -uint sub_group_ballot_inclusive_scan( - uint4 value ) ----- -| Returns the number of bits that are set in the bitfield _value_, only considering the bits in _value_ representing work items with a sub-group local ID less than or equal to this work item's sub-group local ID. - -|[source,opencl_c] ----- -uint sub_group_ballot_exclusive_scan( - uint4 value ) ----- -| Returns the number of bits that are set in the bitfield _value_, only considering the bits in _value_ representing work items with a sub-group local ID less than this work item's sub-group local ID. - -|[source,opencl_c] ----- -uint sub_group_ballot_find_lsb( - uint4 value ) ----- -| Returns the smallest sub-group local ID with a bit set in the bitfield _value_, only considering the bits in _value_ that represent predicate values corresponding to sub-group local IDs less than the maximum sub-group size within the dispatch (as returned by `get_max_sub_group_size`). -If no bits representing predicate values from all work items in the sub-group are set in the bitfield _value_ then the return value is undefined. - -|[source,opencl_c] ----- -uint sub_group_ballot_find_msb( - uint4 value ) ----- -| Returns the largest sub-group local ID with a bit set in the bitfield _value_, only considering the bits in _value_ that represent predicate values corresponding to sub-group local IDs less than the maximum sub-group size within the dispatch (as returned by `get_max_sub_group_size`). -If no bits representing predicate values from all work items in the sub-group are set in the bitfield _value_ then the return value is undefined. - -|[source,opencl_c] ----- -uint4 get_sub_group_eq_mask() ----- -| Generates a bitmask where the bit is set in the bitmask if the bit index equals the sub-group local ID and unset otherwise. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. - -|[source,opencl_c] ----- -uint4 get_sub_group_ge_mask() ----- -| Generates a bitmask where the bit is set in the bitmask if the bit index is greater than or equal to the sub-group local ID and less than the maximum sub-group size, and unset otherwise. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. - -|[source,opencl_c] ----- -uint4 get_sub_group_gt_mask() ----- -| Generates a bitmask where the bit is set in the bitmask if the bit index is greater than the sub-group local ID and less than the maximum sub-group size, and unset otherwise. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. - -|[source,opencl_c] ----- -uint4 get_sub_group_le_mask() ----- -| Generates a bitmask where the bit is set in the bitmask if the bit index is less than or equal to the sub-group local ID and unset otherwise. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. - -|[source,opencl_c] ----- -uint4 get_sub_group_lt_mask() ----- -| Generates a bitmask where the bit is set in the bitmask if the bit index is less than the sub-group local ID and unset otherwise. -Bit zero of the first vector component represents the sub-group local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing sub-group local IDs. - -|======================================================================= - -[[cl_khr_subgroup_non_uniform_arithmetic]] -=== Non-Uniform Arithmetic - -This section describes functionality added by `cl_khr_subgroup_non_uniform_arithmetic`. -This extension adds the ability to use some sub-group functions within non-uniform flow control, including additional scan and reduction operators. - -==== Add a new Section 6.15.X - Non Uniform Sub-group Scan and Reduction Built-in Functions - -===== Arithmetic Operations - -The table below describes the OpenCL C programming language built-in functions that perform simple arithmetic operations across work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols="3a,2",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_reduce_add( - gentype value ) -gentype sub_group_non_uniform_reduce_min( - gentype value ) -gentype sub_group_non_uniform_reduce_max( - gentype value ) -gentype sub_group_non_uniform_reduce_mul( - gentype value ) ----- -| Returns the summation, multiplication, minimum, or maximum of _value_ for all active work items in the sub-group. - -Note: This behavior is the same as the *add*, *min*, and *max* reduction built-in functions from `cl_khr_subgroups` and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the sub-group executing the kernel. - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_scan_inclusive_add( - gentype value ) -gentype sub_group_non_uniform_scan_inclusive_min( - gentype value ) -gentype sub_group_non_uniform_scan_inclusive_max( - gentype value ) -gentype sub_group_non_uniform_scan_inclusive_mul( - gentype value ) ----- -| Returns the result of an inclusive scan operation, which is the summation, multiplication, minimum, or maximum of _value_ for all active work items in the sub-group with a sub-group local ID less than or equal to this work item's sub-group local ID. - -Note: This behavior is the same as the *add*, *min*, and *max* inclusive scan built-in functions from `cl_khr_subgroups` and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the sub-group executing the kernel. - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_scan_exclusive_add( - gentype value ) -gentype sub_group_non_uniform_scan_exclusive_min( - gentype value ) -gentype sub_group_non_uniform_scan_exclusive_max( - gentype value ) -gentype sub_group_non_uniform_scan_exclusive_mul( - gentype value ) ----- -| Returns the result of an exclusive scan operation, which is the summation, multiplication, minimum, or maximum of _value_ for all active work items in the sub-group with a sub-group local ID less than this work item's sub-group local ID. - -If there is no active work item in the sub-group with a sub-group local ID less than this work item's sub-group local ID then an identity value `I` is returned. -For *add*, the identity value is `0`. -For *min*, the identity value is the largest representable value for integer types, or `+INF` for floating-point types. -For *max*, the identity value is the minimum representable value for integer types, or `-INF` for floating-point types. -For *mul*, the identity value is `1`. - -Note: This behavior is the same as the *add*, *min*, and *max* exclusive scan built-in functions from `cl_khr_subgroups` and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the sub-group executing the kernel. - -|======================================================================= - -Note: The order of floating-point operations is not guaranteed for the sub-group scan and reduction built-in functions that operate on floating-point types, and the order of operations may additionally be non-deterministic for a given sub-group. - -===== Bitwise Operations - -The table below describes the OpenCL C programming language built-in functions that perform simple bitwise integer operations across work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, and `ulong`. - -[cols="3a,2",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_reduce_and( - gentype value ) -gentype sub_group_non_uniform_reduce_or( - gentype value ) -gentype sub_group_non_uniform_reduce_xor( - gentype value ) ----- -| Returns the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the sub-group. - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_scan_inclusive_and( - gentype value ) -gentype sub_group_non_uniform_scan_inclusive_or( - gentype value ) -gentype sub_group_non_uniform_scan_inclusive_xor( - gentype value ) ----- -| Returns the result of an inclusive scan operation, which is the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the sub-group with a sub-group local ID less than or equal to this work item's sub-group local ID. - -|[source,opencl_c] ----- -gentype sub_group_non_uniform_scan_exclusive_and( - gentype value ) -gentype sub_group_non_uniform_scan_exclusive_or( - gentype value ) -gentype sub_group_non_uniform_scan_exclusive_xor( - gentype value ) ----- -| Returns the result of an exclusive scan operation, which is the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the sub-group with a sub-group local ID less than this work item's sub-group local ID. - -If there is no active work item in the sub-group with a sub-group local ID less than this work item's sub-group local ID then an identity value `I` is returned. -For *and*, the identity value is `~0` (all bits set). -For *or* and *xor*, the identity value is `0`. - -|======================================================================= - -===== Logical Operations - -The table below describes the OpenCL C programming language built-in functions that perform simple logical operations across work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For these functions, a non-zero _predicate_ argument or return value is logically `true` and a zero _predicate_ argument or return value is logically `false`. - -[cols="2a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -int sub_group_non_uniform_reduce_logical_and( - int predicate ) -int sub_group_non_uniform_reduce_logical_or( - int predicate ) -int sub_group_non_uniform_reduce_logical_xor( - int predicate ) ----- -| Returns the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the sub-group. - -|[source,opencl_c] ----- -int sub_group_non_uniform_scan_inclusive_logical_and( - int predicate ) -int sub_group_non_uniform_scan_inclusive_logical_or( - int predicate ) -int sub_group_non_uniform_scan_inclusive_logical_xor( - int predicate ) ----- -| Returns the result of an inclusive scan operation, which is the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the sub-group with a sub-group local ID less than or equal to this work item's sub-group local ID. - -|[source,opencl_c] ----- -int sub_group_non_uniform_scan_exclusive_logical_and( - int predicate ) -int sub_group_non_uniform_scan_exclusive_logical_or( - int predicate ) -int sub_group_non_uniform_scan_exclusive_logical_xor( - int predicate ) ----- -| Returns the result of an exclusive scan operation, which is the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the sub-group with a sub-group local ID less than this work item's sub-group local ID. - -If there is no active work item in the sub-group with a sub-group local ID less than this work item's sub-group local ID then an identity value `I` is returned. -For *and*, the identity value is `true` (non-zero). -For *or* and *xor*, the identity value is `false` (zero). - -|======================================================================= - -[[cl_khr_subgroup_shuffle]] -=== General Purpose Shuffles - -This section describes functionality added by `cl_khr_subgroup_shuffle`. -This extension adds additional ways to exchange data among work items in a sub-group. - -==== Add a new Section 6.15.X - Sub-group Shuffle Built-in Functions - -The table below describes the OpenCL C programming language built-in functions that allow work items in a sub-group to exchange data. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_shuffle( - gentype value, uint index ) ----- -| Returns _value_ for the work item with sub-group local ID equal to _index_. -The shuffle _index_ need not be the same for all work items in the sub-group. - -The return value is undefined if the work item with sub-group local ID equal to _index_ is inactive or if _index_ is greater than or equal to the size of the sub-group. - -|[source,opencl_c] ----- -gentype sub_group_shuffle_xor( - gentype value, uint mask ) ----- -| Returns _value_ for the work item with sub-group local ID equal to this work item's sub-group local ID xor'd with _mask_. -The shuffle _mask_ need not be the same for all work items in the sub-group. - -The return value is undefined if the work item with sub-group local ID equal to the calculated index is inactive or if the calculated index is greater than or equal to the size of the sub-group. - -This is a specialized function that may perform better than the equivalent `sub_group_shuffle` on some implementations. - -|======================================================================= - -[[cl_khr_subgroup_shuffle_relative]] -=== Relative Shuffles - -This section describes functionality added by `cl_khr_subgroup_shuffle_relative`. -This extension adds specialized ways to exchange data among work items in a sub-group that may perform better on some implementations. - -==== Add a new Section 6.15.X - Sub-group Relative Shuffle Built-in Functions - -The table below describes specialized OpenCL C programming language built-in functions that allow work items in a sub-group to exchange data. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_shuffle_up( - gentype value, uint delta ) ----- -| Returns _value_ for the work item with sub-group local ID equal to this work item's sub-group local ID minus _delta_. -The shuffle _delta_ need not be the same for all work items in the sub-group. - -The return value is undefined if the work item with sub-group local ID equal to the calculated index is inactive, or _delta_ is greater than this work item's sub-group local ID. - -This is a specialized function that may perform better than the equivalent `sub_group_shuffle` on some implementations. - -|[source,opencl_c] ----- -gentype sub_group_shuffle_down( - gentype value, uint delta ) ----- -| Returns _value_ for the work item with sub-group local ID equal to this work item's sub-group local ID plus _delta_. -The shuffle _delta_ need not be the same for all work items in the sub-group. - -The return value is undefined if the work item with sub-group local ID equal to the calculated index is inactive, or this work item's sub-group local ID plus _delta_ is greater than or equal to the size of the sub-group. - -This is a specialized function that may perform better than the equivalent `sub_group_shuffle` on some implementations. - -|======================================================================= - -[[cl_khr_subgroup_clustered_reduce]] -=== Clustered Reductions - -This section describes functionality added by `cl_khr_subgroup_clustered_reduce`. -This extension adds support for clustered reductions that operate on a subset of work items in the sub-group. - -==== Add a new Section 6.15.X - Sub-group Clustered Reduction Built-in Functions - -This section describes arithmetic operations that are performed on a subset of work items in a sub-group, referred to as a cluster. -A cluster is described by a specified cluster size. -Work items in a sub-group are assigned to clusters such that for cluster size _n_, the _n_ work items in the sub-group with the smallest sub-group local IDs are assigned to the first cluster, then the _n_ remaining work items with the smallest sub-group local IDs are assigned to the next cluster, and so on. -Behavior is undefined if the specified cluster size is not an integer constant expression, is not a power-of-two, or is greater than the maximum size of a sub-group within the dispatch. - -===== Arithmetic Operations - -The table below describes the OpenCL C programming language built-in functions that perform simple arithmetic operations on a cluster of work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_clustered_reduce_add( - gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_mul( - gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_min( - gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_max( - gentype value, uint clustersize ) ----- -| Returns the summation, multiplication, minimum, or maximum of _value_ for all active work items in the sub-group within a cluster of the specified _clustersize_. - -|======================================================================= - -Note: The order of floating-point operations is not guaranteed for the sub-group clustered reduction built-in functions that operate on floating-point types, and the order of operations may additionally be non-deterministic for a given sub-group. - -===== Bitwise Operations - -The table below describes the OpenCL C programming language built-in functions to perform simple bitwise integer operations across a cluster of work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be the one of the supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, `uint`, `long`, or `ulong`. - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_clustered_reduce_and( - gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_or( - gentype value, uint clustersize ) -gentype sub_group_clustered_reduce_xor( - gentype value, uint clustersize ) ----- -| Returns the bitwise *and*, *or*, or *xor* of _value_ for all active work items in the sub-group within a cluster of the specified _clustersize_. - -|======================================================================= - -===== Logical Operations - -The table below describes the OpenCL C programming language built-in functions to perform simple logical operations across a cluster of work items in a sub-group. -These functions need not be encountered by all work items in a sub-group executing the kernel. -For these functions, a non-zero _predicate_ argument or return value is logically `true` and a zero _predicate_ argument or return value is logically `false`. - -[cols="3a,2",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -int sub_group_clustered_reduce_logical_and( - int predicate, uint clustersize ) -int sub_group_clustered_reduce_logical_or( - int predicate, uint clustersize ) -int sub_group_clustered_reduce_logical_xor( - int predicate, uint clustersize ) ----- -| Returns the logical *and*, *or*, or *xor* of _predicate_ for all active work items in the sub-group within a cluster of the specified _clustersize_. - -|======================================================================= - -[[extended-sub-groups-mapping]] -=== Function Mapping and Capabilities - -This section describes a possible mapping between OpenCL built-in functions and SPIR-V instructions and required SPIR-V capabilities. - -This section is informational and non-normative. - -// Note: the Unicode "zero with space" (​) causes long function names to break much more sensibly. - -[cols="1,1,1",options="header"] -|======================================================================= -|*OpenCL C Function* -|*SPIR-V BuiltIn or Instruction* -|*Enabling SPIR-V Capability* - -3+| For OpenCL 2.1 or `cl_khr_subgroups`: - -| `get_​sub_​group_​size` - | *SubgroupSize* - | *Kernel* -| `get_​max_​sub_​group_​size` - | *SubgroupMaxSize* - | *Kernel* -| `get_​num_​sub_​groups` - | *NumSubgroups* - | *Kernel* -| `get_​enqueued_​num_​sub_​groups` - | *NumEnqueuedSubgroups* - | *Kernel* -| `get_​sub_​group_​id` - | *SubgroupId* - | *Kernel* -| `get_​sub_​group_​local_​id` - | *SubgroupLocalInvocationId* - | *Kernel* - -| `sub_​group_​barrier` - | *OpControlBarrier* - | None Needed - -| `sub_​group_​all` - | *OpGroupAll* - | *Groups* -| `sub_​group_​any` - | *OpGroupAny* - | *Groups* - -| `sub_​group_​broadcast` - | *OpGroupBroadcast* - | *Groups* - -| `sub_​group_​reduce_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​reduce_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​reduce_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -| `sub_​group_​scan_​exclusive_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​scan_​exclusive_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​scan_​exclusive_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -| `sub_​group_​scan_​inclusive_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​scan_​inclusive_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​scan_​inclusive_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -| `sub_​group_​reserve_​read_​pipe` - | *OpGroupReserveReadPipePackets* - | *Pipes* -| `sub_​group_​reserve_​write_​pipe` - | *OpGroupReserveReadWritePackets* - | *Pipes* -| `sub_​group_​commit_​read_​pipe` - | *OpGroupCommitReadPipe* - | *Pipes* -| `sub_​group_​commit_​write_​pipe` - | *OpGroupCommitWritePipe* - | *Pipes* - -| `get_​kernel_​sub_​group_​count_​for_​ndrange` - | *OpGetKernelNDrangeSubGroupCount* - | *DeviceEnqueue* -| `get_​kernel_​max_​sub_​group_​size_​for_​ndrange` - | *OpGetKernelNDrangeMaxSubGroupSize* - | *DeviceEnqueue* - -3+| For `cl_khr_subgroup_extended_types`: + -Note: This extension adds new types to uniform sub-group operations. - -| `sub_​group_​broadcast` - | *OpGroupBroadcast* - | *Groups* - -| `sub_​group_​reduce_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​reduce_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​reduce_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -| `sub_​group_​scan_​exclusive_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​scan_​exclusive_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​scan_​exclusive_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -| `sub_​group_​scan_​inclusive_​add` - | *OpGroupIAdd*, *OpGroupFAdd* - | *Groups* -| `sub_​group_​scan_​inclusive_​min` - | *OpGroupSMin*, *OpGroupUMin*, *OpGroupFMin* - | *Groups* -| `sub_​group_​scan_​inclusive_​max` - | *OpGroupSMax*, *OpGroupUMax*, *OpGroupFMax* - | *Groups* - -3+| For `cl_khr_subgroup_non_uniform_vote`: - -| `sub_​group_​elect` - | *OpGroupNonUniformElect* - | *GroupNonUniform* -| `sub_​group_​non_​uniform_​all` - | *OpGroupNonUniformAll* - | *GroupNonUniformVote* -| `sub_​group_​non_​uniform_​any` - | *OpGroupNonUniformAny* - | *GroupNonUniformVote* -| `sub_​group_​non_​uniform_​all_​equal` - | *OpGroupNonUniformAllEqual* - | *GroupNonUniformVote* - -3+| For `cl_khr_subgroup_ballot`: - -| `sub_​group_​non_​uniform_​broadcast` - | *OpGroupNonUniformBroadcast* - | *GroupNonUniformBallot* -| `sub_​group_​broadcast_​first` - | *OpGroupNonUniformBroadcastFirst* - | *GroupNonUniformBallot* - -| `sub_​group_​ballot` - | *OpGroupNonUniformBallot* - | *GroupNonUniformBallot* -| `sub_​group_​inverse_​ballot` - | *OpGroupNonUniformInverseBallot* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​bit_​extract` - | *OpGroupNonUniformBallotBitExtract* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​bit_​count` - | *OpGroupNonUniformBallotBitCount* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​inclusive_​scan` - | *OpGroupNonUniformBallotBitCount* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​exclusive_​scan` - | *OpGroupNonUniformBallotBitCount* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​find_​lsb` - | *OpGroupNonUniformBallotFindLSB* - | *GroupNonUniformBallot* -| `sub_​group_​ballot_​find_​msb` - | *OpGroupNonUniformBallotFindMSB* - | *GroupNonUniformBallot* - -| `get_​sub_​group_​eq_​mask` - | *SubgroupEqMask* - | *GroupNonUniformBallot* -| `get_​sub_​group_​ge_​mask` - | *SubgroupGeMask* - | *GroupNonUniformBallot* -| `get_​sub_​group_​gt_​mask` - | *SubgroupGtMask* - | *GroupNonUniformBallot* -| `get_​sub_​group_​le_​mask` - | *SubgroupLeMask* - | *GroupNonUniformBallot* -| `get_​sub_​group_​lt_​mask` - | *SubgroupLtMask* - | *GroupNonUniformBallot* - -3+| For `cl_khr_subgroup_non_uniform_arithmetic`: - -| `sub_​group_​non_​uniform_​reduce_​add` - | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​mul` - | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​min` - | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​max` - | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​and` - | *OpGroupNonUniformBitwiseAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​or` - | *OpGroupNonUniformBitwiseOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​xor` - | *OpGroupNonUniformBitwiseXor* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​logical_​and` - | *OpGroupNonUniformLogicalAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​logical_​or` - | *OpGroupNonUniformLogicalOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​reduce_​logical_​xor` - | *OpGroupNonUniformLogicalXor* - | *GroupNonUniformArithmetic* - -| `sub_​group_​non_​uniform_​scan_​inclusive_​add` - | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​mul` - | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​min` - | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​max` - | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​and` - | *OpGroupNonUniformBitwiseAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​or` - | *OpGroupNonUniformBitwiseOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​xor` - | *OpGroupNonUniformBitwiseXor* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​and` - | *OpGroupNonUniformLogicalAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​or` - | *OpGroupNonUniformLogicalOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​inclusive_​logical_​xor` - | *OpGroupNonUniformLogicalXor* - | *GroupNonUniformArithmetic* - -| `sub_​group_​non_​uniform_​scan_​exclusive_​add` - | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​exclusive_​mul` - | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​scan_​exclusive_​min` - | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​max` - | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​and` - | *OpGroupNonUniformBitwiseAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​or` - | *OpGroupNonUniformBitwiseOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​xor` - | *OpGroupNonUniformBitwiseXor* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​and` - | *OpGroupNonUniformLogicalAnd* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​or` - | *OpGroupNonUniformLogicalOr* - | *GroupNonUniformArithmetic* -| `sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​xor` - | *OpGroupNonUniformLogicalXor* - | *GroupNonUniformArithmetic* - -3+| For `cl_khr_subgroup_shuffle`: - -| `sub_​group_​shuffle` - | *OpGroupNonUniformShuffle* - | *GroupNonUniformShuffle* -| `sub_​group_​shuffle_​xor` - | *OpGroupNonUniformShuffleXor* - | *GroupNonUniformShuffle* - -3+| For `cl_khr_subgroup_shuffle_relative`: - -| `sub_​group_​shuffle_​up` - | *OpGroupNonUniformShuffleUp* - | *GroupNonUniformShuffleRelative* -| `sub_​group_​shuffle_​down` - | *OpGroupNonUniformShuffleDown* - | *GroupNonUniformShuffleRelative* - -3+| For `cl_khr_subgroup_clustered_reduce`: - -| `sub_​group_​clustered_​reduce_​add` - | *OpGroupNonUniformIAdd*, *OpGroupNonUniformFAdd* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​mul` - | *OpGroupNonUniformIMul*, *OpGroupNonUniformFMul* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​min` - | *OpGroupNonUniformSMin*, *OpGroupNonUniformUMin*, *OpGroupNonUniformFMin* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​max` - | *OpGroupNonUniformSMax*, *OpGroupNonUniformUMax*, *OpGroupNonUniformFMax* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​and` - | *OpGroupNonUniformBitwiseAnd* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​or` - | *OpGroupNonUniformBitwiseOr* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​xor` - | *OpGroupNonUniformBitwiseXor* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​logical_​and` - | *OpGroupNonUniformLogicalAnd* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​logical_​or` - | *OpGroupNonUniformLogicalOr* - | *GroupNonUniformClustered* -| `sub_​group_​clustered_​reduce_​logical_​xor` - | *OpGroupNonUniformLogicalXor* - | *GroupNonUniformClustered* - -|======================================================================= diff --git a/ext/cl_khr_subgroup_named_barrier.asciidoc b/ext/cl_khr_subgroup_named_barrier.asciidoc deleted file mode 100644 index 08636a76..00000000 --- a/ext/cl_khr_subgroup_named_barrier.asciidoc +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_subgroup_named_barrier]] -== Named Barriers for Sub-groups - -This section describes the *cl_khr_subgroup_named_barrier* extension. -This extension adds barrier operations that cover subsets of an OpenCL -work-group. -Only the OpenCL API changes are described in this section. -Please refer to the SPIR-V specification for information about using -sub-groups named barriers in the SPIR-V intermediate representation, and to -the OpenCL {cpp} specification for descriptions of the sub-group named -barrier built-in functions in the OpenCL {cpp} kernel language. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -=== Changes to OpenCL specification - -Add to _table 4.3_: - -.List of supported param_names by {clGetDeviceInfo} -[cols="2,1,3",options="header",] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR} -| {cl_uint_TYPE} -| Maximum number of named barriers in a work-group for any given - kernel-instance running on the device. - The minimum value is 8. - -|==== - diff --git a/ext/cl_khr_subgroup_rotate.asciidoc b/ext/cl_khr_subgroup_rotate.asciidoc deleted file mode 100644 index 337f9888..00000000 --- a/ext/cl_khr_subgroup_rotate.asciidoc +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2022-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_subgroup_rotate]] -== Sub-group Rotation - -This extension adds support for a new sub-group data exchange operation that -makes it possible to rotate values through the work items in a sub-group. - -=== General Information - -==== Name Strings - -`cl_khr_subgroup_rotate` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2022-04-22 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL Specification Version 3.0.10, -and OpenCL C Specification Version 3.0.10 and OpenCL Environment Specification -Version 3.0.10. - -This extension requires OpenCL 2.0. - -==== Contributors - -Kévin Petit, Arm Ltd. + -Ben Ashbaugh, Intel + -Ruihao Zhang, Qualcomm + -Sven van Haastregt, Arm Ltd. + -Anastasia Stulova, Arm Ltd. + -Stuart Brady, Arm Ltd. + - -=== New OpenCL C Functions - -This extension adds the following built-in function: - -[source,opencl_c] ----- -gentype sub_group_rotate(gentype value, int delta) -gentype sub_group_clustered_rotate(gentype value, int delta, uint clustersize) ----- - -=== Modifications to the OpenCL C Specification - -(Add a new section 6.15.x, *Sub-group Rotation*) :: -+ --- - -The following preprocessor definitions are added: - -[source,opencl_c] ----- -#define cl_khr_subgroup_rotate 1 ----- - -The table below describes a specialized OpenCL C programming language built-in -function that allow work items in a sub-group to exchange data. This function -need not be encountered by all work items in a sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be one of the -supported built-in scalar data types `char`, `uchar`, `short`, `ushort`, `int`, -`uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), -or `half` (if half precision is supported). - -[cols="1a,1",options="header",] -|======================================================================= -|*Function* -|*Description* - -|[source,opencl_c] ----- -gentype sub_group_rotate( - gentype value, int delta) ----- -| Returns _value_ for the work item with sub-group local ID equal to the remainder -of the division of the sum of this work item's sub-group local ID and _delta_ by -the maximum sub-group size. + -The value of _delta_ is required to be dynamically-uniform for all work items in -the sub-group, otherwise the behavior is undefined. - -The return value is undefined if the work item with sub-group local ID equal to the -calculated index is inactive. - -|[source,opencl_c] ----- -gentype sub_group_clustered_rotate( - gentype value, int delta, - uint clustersize) ----- -| Returns _value_ for the work item with sub-group local ID equal to the sum of, the -remainder of the division of the sum of this work item's ID within the cluster and -_delta_ by _clustersize_, and the sub-group local ID of the first work-item of the -cluster to which the work-item executing the function belongs. + -The value of _delta_ is required to be dynamically-uniform for all work items in -the sub-group, otherwise the behavior is undefined. - -_clustersize_ must be an integer constant expression and a power of two, smaller -than or equal to the maximum sub-group size, otherwise the behavior is undefined. - -The return value is undefined if the work item with sub-group local ID equal to the -calculated index is inactive. -|======================================================================= --- - -=== Modifications to the OpenCL SPIR-V Environment Specification - -See OpenCL SPIR-V Environment Specification. - -=== Interactions with Other Extensions - -If `cl_khr_il_program` is supported then the SPIR-V environment specification -modifications described above apply. - diff --git a/ext/cl_khr_subgroups.asciidoc b/ext/cl_khr_subgroups.asciidoc deleted file mode 100644 index ae479e9b..00000000 --- a/ext/cl_khr_subgroups.asciidoc +++ /dev/null @@ -1,426 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_subgroups]] -== Sub-groups - -This section describes the *cl_khr_subgroups* extension. - -This extension adds support for implementation-controlled groups of work items, known as sub-groups. -Sub-groups behave similarly to work-groups and have their own sets of built-ins and synchronization primitives. -Sub-groups within a work-group are independent, may make forward progress with respect to each other, and may map to optimized hardware structures where that makes sense. - -Sub-groups were promoted to a core feature in OpenCL 2.1, however note that: - -* The sub-group OpenCL C built-in functions described by this extension must still be accessed as an OpenCL C extension in OpenCL 2.1. -* Sub-group independent forward progress is an optional device property in OpenCL 2.1, see {CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS}. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_subgroups-additions-to-chapter-3-of-the-opencl-2.0-specification]] -=== Additions to Chapter 3 of the OpenCL 2.0 Specification - -[[cl_khr_subgroups-additions-to-section-3.2-execution-model]] -=== Additions to section 3.2 -- Execution Model - -Within a work-group work-items may be divided into sub-groups. -The mapping of work-items to sub-groups is implementation-defined and may be queried at runtime. -While sub-groups may be used in multi-dimensional work-groups, each sub-group is 1-dimensional and any given work-item may query which sub-group it is a member of. - -Work items are mapped into sub-groups through a combination of compile-time decisions and the parameters of the dispatch. -The mapping to sub-groups is invariant for the duration of a kernel’s execution, across dispatches of a given kernel with the same launch parameters, and from one work-group to another within the dispatch (excluding the trailing edge work-groups in the presence of non-uniform work-group sizes). -In addition, all sub-groups within a work-group will be the same size, apart from the sub-group with the maximum index which may be smaller if the size of the work-group is not evenly divisible by the size of the sub-group. - -Sub-groups execute concurrently within a given work-group and make independent forward progress with respect to each other even in the absence of work-group barrier operations. -Sub-groups are able to internally synchronize using barrier operations without synchronizing with each other. - -In the degenerate case, with the extension enabled, a single sub-group must be supported for each work-group. -In this situation all sub-group scope functions alias their work-group level equivalents. - -[[cl_khr_subgroups-additions-to-chapter-5-of-the-opencl-2.0-specification]] -=== Additions to Chapter 5 of the OpenCL 2.0 Specification - -The function - -include::{generated}/api/protos/clGetKernelSubGroupInfoKHR.txt[] - -returns information about the kernel object. - -_kernel_ specifies the kernel object being queried. - -_device_ identifies a specific device in the list of devices associated with -_kernel_. -The list of devices is the list of devices in the OpenCL context that is -associated with _kernel_. -If the list of devices associated with _kernel_ is a single device, _device_ -can be a `NULL` value. - -_param_name_ specifies the information to query. -The list of supported _param_name_ types and the information returned in -_param_value_ by {clGetKernelSubGroupInfoKHR} is described in the -<> table. - -_input_value_size_ is used to specify the size in bytes of memory pointed to -by _input_value_. -This size must be == size of input type as described in the table below. - -_input_value_ is a pointer to memory where the appropriate parameterization -of the query is passed from. -If _input_value_ is `NULL`, it is ignored. - -_param_value_ is a pointer to memory where the appropriate result being -queried is returned. -If _param_value_ is `NULL`, it is ignored. - -_param_value_size_ is used to specify the size in bytes of memory pointed to -by _param_value_. -This size must be {geq} size of return type as described in the -<> table. - -_param_value_size_ret_ returns the actual size in bytes of data being -queried by _param_name_. -If _param_value_size_ret_ is `NULL`, it is ignored. - -[[cl_khr_subgroups-kernel-sub-group-info-table]] -.List of supported param_names by {clGetKernelSubGroupInfoKHR} -[width="100%",cols="<25%,<25%,<25%,<25%",options="header"] -|==== -| Kernel Sub-group Info | Input Type | Return Type | Description -| {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR} - | {size_t_TYPE}* - | {size_t_TYPE} - | Returns the maximum sub-group size for this kernel. - All sub-groups must be the same size, while the last sub-group in - any work-group (i.e. the sub-group with the maximum index) could - be the same or smaller size. - - The _input_value_ must be an array of size_t values - corresponding to the local work size parameter of the intended - dispatch. - The number of dimensions in the ND-range will be inferred from - the value specified for _input_value_size_. -| {CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR} - | {size_t_TYPE}* - | {size_t_TYPE} - | Returns the number of sub-groups that will be present in each - work-group for a given local work size. - All workgroups, apart from the last work-group in each dimension - in the presence of non-uniform work-group sizes, will have the - same number of sub-groups. - - The _input_value_ must be an array of size_t values - corresponding to the local work size parameter of the intended - dispatch. - The number of dimensions in the ND-range will be inferred from - the value specified for _input_value_size_. -|==== - -{clGetKernelSubGroupInfoKHR} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_DEVICE} if _device_ is not in the list of devices associated - with _kernel_ or if _device_ is `NULL` but there is more than one device - associated with _kernel_. - * {CL_INVALID_VALUE} if _param_name_ is not valid, or if size in bytes - specified by _param_value_size_ is < size of return type as described in - the <> table - and _param_value_ is not `NULL`. - * {CL_INVALID_VALUE} if _param_name_ is - {CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR} and the size in bytes specified by - _input_value_size_ is not valid or if _input_value_ is `NULL`. - * {CL_INVALID_KERNEL} if _kernel_ is a not a valid kernel object. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -[[cl_khr_subgroups-additions-to-chapter-6-of-the-opencl-2.0-specification]] -=== Additions to Chapter 6 of the OpenCL 2.0 C Specification - -[[cl_khr_subgroups-additions-to-section-6.13.1-work-item-functions]] -==== Additions to section 6.13.1 -- Work Item Functions - -[cols="a,",options="header",] -|==== -| *Function* -| *Description* - -| uint *get_sub_group_size* () -| Returns the number of work items in the sub-group. - This value is no more than the maximum sub-group size and is - implementation-defined based on a combination of the compiled kernel and - the dispatch dimensions. - This will be a constant value for the lifetime of the sub-group. - -| uint *get_max_sub_group_size* () -| Returns the maximum size of a sub-group within the dispatch. - This value will be invariant for a given set of dispatch dimensions and a - kernel object compiled for a given device. - -| uint *get_num_sub_groups* () -| Returns the number of sub-groups that the current work-group is divided - into. - - This number will be constant for the duration of a work-group's execution. - If the kernel is executed with a non-uniform work-group size - (i.e. the global_work_size values specified to {clEnqueueNDRangeKernel} - are not evenly divisible by the local_work_size values for any dimension, - calls to this built-in from some work-groups may return different values - than calls to this built-in from other work-groups. - -| uint *get_enqueued_num_sub_groups* () -| Returns the same value as that returned by *get_num_sub_groups* if the - kernel is executed with a uniform work-group size. - - If the kernel is executed with a non-uniform work-group size, returns the - number of sub-groups in each of the work-groups that make up the uniform - region of the global range. - -| uint *get_sub_group_id* () -| *get_sub_group_id* returns the sub-group ID which is a number from 0 .. - *get_num_sub_groups*() - 1. - - For {clEnqueueTask}, this returns 0. - -| uint *get_sub_group_local_id* () -| Returns the unique work item ID within the current sub-group. - The mapping from *get_local_id*(__dimindx__) to *get_sub_group_local_id* - will be invariant for the lifetime of the work-group. - -|==== - -[[cl_khr_subgroups-additions-to-section-6.13.8-synchronization-functions]] -==== Additions to section 6.13.8 -- Synchronization Functions - -[cols="3,7",options="header",] -|==== -| *Function* -| *Description* - -| void **sub_group_barrier** ( + - cl_mem_fence_flags _flags_) - - void **sub_group_barrier** ( + - cl_mem_fence_flags _flags_, memory_scope _scope_) - -| All work items in a sub-group executing the kernel on a processor must - execute this function before any are allowed to continue execution beyond - the sub-group barrier. - This function must be encountered by all work items in a sub-group - executing the kernel. - These rules apply to ND-ranges implemented with uniform and non-uniform - work-groups. - - If *sub_group_barrier* is inside a conditional statement, then all work - items within the sub-group must enter the conditional if any work item in - the sub-group enters the conditional statement and executes the - sub_group_barrier. - - If *sub_group_barrier* is inside a loop, all work items within the sub-group - must execute the sub_group_barrier for each iteration of the loop before - any are allowed to continue execution beyond the sub_group_barrier. - - The *sub_group_barrier* function also queues a memory fence (reads and - writes) to ensure correct ordering of memory operations to local or global - memory. - - The flags argument specifies the memory address space and can be set to a - combination of the following values: - - CLK_LOCAL_MEM_FENCE - The *sub_group_barrier* function will either flush - any variables stored in local memory or queue a memory fence to ensure - correct ordering of memory operations to local memory. - - CLK_GLOBAL_MEM_FENCE -- The *sub_group_barrier* function will queue a - memory fence to ensure correct ordering of memory operations to global - memory. - This can be useful when work items, for example, write to buffer objects - and then want to read the updated data from these buffer objects. - - CLK_IMAGE_MEM_FENCE -- The *sub_group_barrier* function will queue a memory - fence to ensure correct ordering of memory operations to image objects. - This can be useful when work items, for example, write to image objects - and then want to read the updated data from these image objects. - -|==== - -[[cl_khr_subgroups-additions-to-section-6.13.11-atomic-functions]] -==== Additions to section 6.13.11 -- Atomic Functions - -Add the following new value to the enumerated type `memory_scope` defined in -_section 6.13.11.4_. - ----- -memory_scope_sub_group ----- - -The `memory_scope_sub_group` specifies that the memory ordering constraints -given by `memory_order` apply to work items in a sub-group. -This memory scope can be used when performing atomic operations to global or -local memory. - -[[cl_khr_subgroups-add-a-new-section-6.13.X-sub-group-functions]] -==== Add a new section 6.13.X -- Sub-group Functions - -The table below describes OpenCL C programming language built-in functions that operate on a sub-group level. -These built-in functions must be encountered by all work items in the sub-group executing the kernel. -For the functions below, the generic type name `gentype` may be the one of the supported built-in scalar data types `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| int *sub_group_all* (int _predicate_) -| Evaluates _predicate_ for all work items in the sub-group and returns a - non-zero value if _predicate_ evaluates to non-zero for all work items in - the sub-group. - -| int *sub_group_any* (int _predicate_) -| Evaluates _predicate_ for all work items in the sub-group and returns a - non-zero value if _predicate_ evaluates to non-zero for any work items in - the sub-group. - -| gentype *sub_group_broadcast* ( + - gentype _x_, uint _sub_group_local_id_) -| Broadcast the value of _x_ for work item identified by - _sub_group_local_id_ (value returned by *get_sub_group_local_id*) to all - work items in the sub-group. - - _sub_group_local_id_ must be the same value for all work items in the - sub-group. - -| gentype *sub_group_reduce_* ( + - gentype _x_) -| Return result of reduction operation specified by ** for all values of - _x_ specified by work items in a sub-group. - -| gentype *sub_group_scan_exclusive_* ( + - gentype _x_) -| Do an exclusive scan operation specified by ** of all values specified - by work items in a sub-group. - The scan results are returned for each work item. - - The scan order is defined by increasing sub-group local ID within the - sub-group. - -| gentype *sub_group_scan_inclusive_* ( + - gentype _x_) -| Do an inclusive scan operation specified by ** of all values specified - by work items in a sub-group. - The scan results are returned for each work item. - - The scan order is defined by increasing sub-group local ID within the - sub-group. - -|==== - -The ** in *sub_group_reduce_*, *sub_group_scan_inclusive_* and *sub_group_scan_exclusive_* defines the operator and can be *add*, *min* or *max*. - -The exclusive scan operation takes a binary operator *op* with an identity I and _n_ (where _n_ is the size of the sub-group) elements [a~0~, a~1~, ... a~n-1~] and returns [I, a~0~, (a~0~ *op* a~1~), ... (a~0~ *op* a~1~ *op* ... *op* a~n-2~)]. - -The inclusive scan operation takes a binary operator *op* with _n_ (where _n_ is the size of the sub-group) elements [a~0~, a~1~, ... a~n-1~] and returns [a~0~, (a~0~ *op* a~1~), ... (a~0~ *op* a~1~ *op* ... *op* a~n-1~)]. - -If *op* = *add*, the identity I is 0. -If *op* = *min*, the identity I is `INT_MAX`, `UINT_MAX`, `LONG_MAX`, `ULONG_MAX`, for `int`, `uint`, `long`, `ulong` types and is `+INF` for -floating-point types. -Similarly if *op* = max, the identity I is `INT_MIN`, 0, `LONG_MIN`, 0 and `-INF`. - -[NOTE] -==== -The order of floating-point operations is not guaranteed for the *sub_group_reduce_*, *sub_group_scan_inclusive_* and *sub_group_scan_exclusive_* built-in functions that operate on `half`, `float` and `double` data types. -The order of these floating-point operations is also non-deterministic for a given sub-group. -==== - -[[cl_khr_subgroups-additions-to-section-6.13.16-pipe-functions]] -==== Additions to section 6.13.16 -- Pipe Functions - -The OpenCL C programming language implements the following built-in pipe -functions that operate at a sub-group level. -These built-in functions must be encountered by all work items in a sub-group -executing the kernel with the same argument values; otherwise the behavior -is undefined. -We use the generic type name `gentype` to indicate the built-in OpenCL C -scalar or vector integer or floating-point data types or any user defined -type built from these scalar and vector data types can be used as the type -for the arguments to the pipe functions listed in _table 6.29_. - -[cols=",",options="header",] -|==== -| *Function* -| *Description* - -| reserve_id_t *sub_group_reserve_read_pipe* ( + - read_only pipe gentype _pipe_, + - uint _num_packets_) - - reserve_id_t *sub_group_reserve_write_pipe* ( + - write_only pipe gentype _pipe_, + - uint _num_packets_) -| Reserve _num_packets_ entries for reading from or writing to _pipe_. - Returns a valid non-zero reservation ID if the reservation is successful - and 0 otherwise. - - The reserved pipe entries are referred to by indices that go from 0 ... - _num_packets_ - 1. - -| void *sub_group_commit_read_pipe* ( + - read_only pipe gentype _pipe_, + - reserve_id_t _reserve_id_) - - void *sub_group_commit_write_pipe* ( + - write_only pipe gentype _pipe_, + - reserve_id_t _reserve_id_) -| Indicates that all reads and writes to _num_packets_ associated with - reservation _reserve_id_ are completed. - -|==== - -Note: Reservations made by a sub-group are ordered in the pipe as they are -ordered in the program. -Reservations made by different sub-groups that belong to the same work-group -can be ordered using sub-group synchronization. -The order of sub-group based reservations that belong to different work -groups is implementation-defined. - -[[cl_khr_subgroups-additions-to-section-6.13.17.6-enqueuing-kernels-kernel-query-functions]] -==== Additions to section 6.13.17.6 -- Enqueuing Kernels (Kernel Query Functions) - -[cols="5,4",options="header",] -|==== -| *Built-in Function* -| *Description* - -| uint *get_kernel_sub_group_count_for_ndrange* ( + - const ndrange_t _ndrange_, + - void (^block)(void)); - - uint *get_kernel_sub_group_count_for_ndrange* ( + - const ndrange_t _ndrange_, + - void (^block)(local void *, ...)); -| Returns the number of sub-groups in each work-group of the dispatch (except - for the last in cases where the global size does not divide cleanly into - work-groups) given the combination of the passed ndrange and block. - - _block_ specifies the block to be enqueued. - -| uint *get_kernel_max_sub_group_size_for_ndrange* ( + - const ndrange_t _ndrange_, + - void (^block)(void)); + - - uint *get_kernel_max_sub_group_size_for_ndrange* ( + - const ndrange_t _ndrange_, + - void (^block)(local void *, ...)); -| Returns the maximum sub-group size for a block. - -|==== diff --git a/ext/cl_khr_suggested_local_work_size.asciidoc b/ext/cl_khr_suggested_local_work_size.asciidoc deleted file mode 100644 index 97bef879..00000000 --- a/ext/cl_khr_suggested_local_work_size.asciidoc +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_suggested_local_work_size]] -== Suggested Local Work Size Query - -This extension adds the ability to query a suggested local work-group size for a kernel running on a device for a specified global work size and global work offset. -The suggested local work-group size will match the work-group size that would be chosen if the kernel were enqueued with the specified global work size and global work offset and a `NULL` local work size. - -By using the suggested local work-group size query an application has greater insight into the local work-group size chosen by the OpenCL implementation, and the OpenCL implementation need not re-compute the local work-group size if the same kernel is enqueued multiple times with the same parameters. - -=== General Information - -==== Name Strings - -`cl_khr_suggested_local_work_size` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2021-04-22 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL API Specification Version V3.0.6. - -This extension requires OpenCL 1.0. - -=== New API Functions - -[source,opencl] ----- -cl_int clGetKernelSuggestedLocalWorkSizeKHR( - cl_command_queue command_queue, - cl_kernel kernel, - cl_uint work_dim, - const size_t *global_work_offset, - const size_t *global_work_size, - size_t *suggested_local_work_size); ----- - -=== Modifications to the OpenCL API Specification - -==== Section 5.9 - Kernel Objects: - -===== New Section 5.9.4.X - Suggested Local Work Size Query - -To query a suggested local work size for a kernel object, call the function - -include::{generated}/api/protos/clGetKernelSuggestedLocalWorkSizeKHR.txt[] - -The returned suggested local work size is expected to match the local work size that would be chosen if the specified kernel object, with the same kernel arguments, were enqueued into the specified command-queue with the specified global work size, specified global work offset, and with a `NULL` local work size. - -* _command_queue_ specifies the command-queue and device for the query. -* _kernel_ specifies the kernel object and kernel arguments for the query. -The OpenCL context associated with _kernel_ and _command_queue_ must the same. -* _work_dim_ specifies the number of work dimensions in the input global work offset and global work size, and the output suggested local work size. -* _global_work_offset_ can be used to specify an array of at least _work_dim_ global ID offset values for the query. -This is optional and may be `NULL` to indicate there is no global ID offset. -* _global_work_size_ is an array of at least _work_dim_ values describing the global work size for the query. -* _suggested_local_work_size_ is an output array of at least _work_dim_ values that will contain the result of the query. - -{clGetKernelSuggestedLocalWorkSizeKHR} returns {CL_SUCCESS} if the query executed successfully. -Otherwise, it returns one of the following errors: - -* {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not a valid host command-queue. -* {CL_INVALID_KERNEL} if _kernel_ is not a valid kernel object. -* {CL_INVALID_CONTEXT} if the context associated with _kernel_ is not the same as the context associated with _command_queue_. -* {CL_INVALID_PROGRAM_EXECUTABLE} if there is no successfully built program executable available for _kernel_ for the device associated with _command_queue_. -* {CL_INVALID_KERNEL_ARGS} if all argument values for _kernel_ have not been set. -* {CL_MISALIGNED_SUB_BUFFER_OFFSET} if a sub-buffer object is set as an argument to _kernel_ and the offset specified when the sub-buffer object was created is not aligned to {CL_DEVICE_MEM_BASE_ADDR_ALIGN} for the device associated with _command_queue_. -* {CL_INVALID_IMAGE_SIZE} if an image object is set as an argument to _kernel_ and the image dimensions are not supported by device associated with _command_queue_. -* {CL_IMAGE_FORMAT_NOT_SUPPORTED} if an image object is set as an argument to _kernel_ and the image format is not supported by the device associated with _command_queue_. -* {CL_INVALID_OPERATION} if an SVM pointer is set as an argument to _kernel_ and the device associated with _command_queue_ does not support SVM or the required SVM capabilities for the SVM pointer. -* {CL_INVALID_WORK_DIMENSION} if _work_dim_ is not a valid value (i.e. a value between 1 and {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS}). -* {CL_INVALID_GLOBAL_WORK_SIZE} if _global_work_size_ is NULL or if any of the values specified in _global_work_size_ are 0. -* {CL_INVALID_GLOBAL_WORK_SIZE} if any of the values specified in _global_work_size_ exceed the maximum value representable by `size_t` on the device associated with _command_queue_. -* {CL_INVALID_GLOBAL_OFFSET} if the value specified in _global_work_size_ plus the corresponding value in _global_work_offset_ for dimension exceeds the maximum value representable by `size_t` on the device associated with _command_queue_. -* {CL_INVALID_VALUE} if _suggested_local_work_size_ is NULL. -* {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. -* {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources required by the OpenCL implementation on the host. - -NOTE: These error conditions are consistent with error conditions for {clEnqueueNDRangeKernel}. diff --git a/ext/cl_khr_terminate_context.asciidoc b/ext/cl_khr_terminate_context.asciidoc deleted file mode 100644 index 9a771788..00000000 --- a/ext/cl_khr_terminate_context.asciidoc +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_terminate_context]] -== Terminating OpenCL Contexts - -Today, OpenCL provides an API to release a context. -This operation is done only after all queues, memory object, programs and -kernels are released, which in turn might wait for all ongoing operations to -complete. -However, there are cases in which a fast release is required, or release -operation cannot be done, as commands are stuck in mid execution. -An example of the first case can be program termination due to exception, or -quick shutdown due to low power. -Examples of the second case are when a kernel is running too long, or gets -stuck, or it may result from user action which makes the results of the -computation unnecessary. - -In many cases, the driver or the device is capable of speeding up the -closure of ongoing operations when the results are no longer required in a -much more expedient manner than waiting for all previously enqueued -operations to finish. - -This extension implements a new query to check whether a device can -terminate an OpenCL context and adds an API to terminate a context. - -The extension name is *cl_khr_terminate_context*. - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_terminate_context-additions-to-chapter-4]] -=== Additions to Chapter 4 of the OpenCL 2.2 Specification - -Add a new device property to _table 4.3_ in _section 4.2_. - -.List of supported param_names by {clGetDeviceInfo} -[cols="3,2,4",options="header",] -|==== -| Device Info -| Return Type -| Description - -| {CL_DEVICE_TERMINATE_CAPABILITY_KHR} -| {cl_device_terminate_capability_khr_TYPE} -| Describes the termination capability of the OpenCL device. - This is a bit-field, where the following values are currently supported: - - {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR} - Indicates that context - termination is supported. - -|==== - -Add a new context property to _table 4.5_ in _section 4.4_. - -.List of supported context creation properties by {clCreateContext} -[cols="3,2,4",options="header",] -|==== -| Context Property -| Property value -| Description - -| {CL_CONTEXT_TERMINATE_KHR} -| {cl_bool_TYPE} -| Specifies whether the context can be terminated. - The default value is {CL_FALSE}. - -|==== - -{CL_CONTEXT_TERMINATE_KHR} can be specified in the context properties only if -all devices associated with the context support the ability to support -context termination (i.e. {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR} is set -for {CL_DEVICE_TERMINATE_CAPABILITY_KHR}). -Otherwise, context creation fails with error code of {CL_INVALID_PROPERTY}. - -The new function - -include::{generated}/api/protos/clTerminateContextKHR.txt[] - -terminates all pending work associated with the context and renders all data -owned by the context invalid. -It is the responsibility of the application to release all objects -associated with the context being terminated. - -When a context is terminated: - - * The execution status of enqueued commands will be {CL_CONTEXT_TERMINATED_KHR}. - Event objects can be queried using {clGetEventInfo}. - Event callbacks can be registered and registered event callbacks will be - called with _event_command_status_ set to {CL_CONTEXT_TERMINATED_KHR}. - {clWaitForEvents} will return as immediately for commands associated - with event objects specified in event_list. - The status of user events can be set. - Event objects can be retained and released. - {clGetEventProfilingInfo} returns {CL_PROFILING_INFO_NOT_AVAILABLE}. - * The context is considered to be terminated. - A callback function registered when the context was created will be - called. - Only queries, retain and release operations can be performed on the - context. - All other APIs that use a context as an argument will return - {CL_CONTEXT_TERMINATED_KHR}. - * The contents of the memory regions of the memory objects is undefined. - Queries, registering a destructor callback, retain and release - operations can be performed on the memory objects. - * Once a context has been terminated, all OpenCL API calls that create - objects or enqueue commands will return {CL_CONTEXT_TERMINATED_KHR}. - APIs that release OpenCL objects will continue to operate as though - {clTerminateContextKHR} was not called. - * The behavior of callbacks will remain unchanged, and will report - appropriate error, if executing after termination of context. - This behavior is similar to enqueued commands, after the command-queue - has become invalid. - -{clTerminateContextKHR} returns {CL_SUCCESS} if the function is executed -successfully. -Otherwise, it returns one of the following errors: - - * {CL_INVALID_CONTEXT} if _context_ is not a valid OpenCL context. - * {CL_CONTEXT_TERMINATED_KHR} if _context_ has already been terminated. - * {CL_INVALID_OPERATION} if _context_ was not created with - {CL_CONTEXT_TERMINATE_KHR} set to {CL_TRUE}. - * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required - by the OpenCL implementation on the device. - * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -An implementation that supports this extension must be able to terminate -commands currently executing on devices or queued across all command-queues -associated with the context that is being terminated. -The implementation cannot implement this extension by waiting for currently -executing (or queued) commands to finish execution on devices associated -with this context (i.e. doing a {clFinish}). diff --git a/ext/cl_khr_throttle_hints.asciidoc b/ext/cl_khr_throttle_hints.asciidoc deleted file mode 100644 index 8b19ce69..00000000 --- a/ext/cl_khr_throttle_hints.asciidoc +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_throttle_hints]] -== Throttle Hints - -This section describes the *cl_khr_throttle_hints* extension. -This extension adds throttle hints for OpenCL, but does not specify the -throttling behavior or minimum guarantees. -It is expected that the user guide associated with each implementation which -supports this extension will describe the throttling behavior guarantees. - -Note that the throttle hint is orthogonal to functionality defined in -*cl_khr_priority_hints* extension. -For example, a task may have high priority ({CL_QUEUE_PRIORITY_HIGH_KHR}) -but should at the same time be executed at an optimized throttle setting -({CL_QUEUE_THROTTLE_LOW_KHR}). - -=== General Information - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2020-04-21 | 1.0.0 | First assigned version. -|==== - -[[cl_khr_throttle_hints-host-side-api-modifications]] -=== Host-side API modifications - -The function {clCreateCommandQueueWithProperties} (Section 5.1) is -extended to support a new {CL_QUEUE_THROTTLE_KHR} value as part of the -_properties_ argument. - -The properties field accepts the following values: - - * {CL_QUEUE_THROTTLE_HIGH_KHR} (full throttle, i.e., OK to consume more - energy) - * {CL_QUEUE_THROTTLE_MED_KHR} (normal throttle) - * {CL_QUEUE_THROTTLE_LOW_KHR} (optimized/lowest energy consumption) - -If {CL_QUEUE_THROTTLE_KHR} is not specified then the default priority is -{CL_QUEUE_THROTTLE_MED_KHR}. - -To the error section for {clCreateCommandQueueWithProperties}, the -following is added: - - * {CL_INVALID_QUEUE_PROPERTIES} if the {CL_QUEUE_THROTTLE_KHR} property is - specified and the queue is a {CL_QUEUE_ON_DEVICE}. diff --git a/ext/cl_khr_work_group_uniform_arithmetic.asciidoc b/ext/cl_khr_work_group_uniform_arithmetic.asciidoc deleted file mode 100644 index 097f0aed..00000000 --- a/ext/cl_khr_work_group_uniform_arithmetic.asciidoc +++ /dev/null @@ -1,239 +0,0 @@ -// Copyright 2022-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -[[cl_khr_work_group_uniform_arithmetic]] -== Work-group Uniform Arithmetic - -This extension adds additional work-group collective functions to OpenCL C. -Specifically, this extension adds support for work-group scans and reductions for the following operators: - -* Logical operations (`and`, `or`, and `xor`). -* Bitwise operations (`and`, `or`, and `xor`). -* Integer multiplication (`mul`). -* Floating-point multiplication (`mul`). - -=== General Information - -==== Name Strings - -`cl_khr_work_group_uniform_arithmetic` - -==== Version History - -[cols="1,1,3",options="header",] -|==== -| *Date* | *Version* | *Description* -| 2022-04-29 | 1.0.0 | Initial version. -|==== - -==== Dependencies - -This extension is written against the OpenCL Specification -Version 3.0.10. - -This extension requires OpenCL 2.0. - -==== Contributors - -Kevin Petit, Arm Ltd. + -Ben Ashbaugh, Intel + - -=== New OpenCL C Functions - -The following functions are added to OpenCL C. - -[source,opencl_c] ----- -int work_group_reduce_logical_and(int predicate); -int work_group_reduce_logical_or(int predicate); -int work_group_reduce_logical_xor(int predicate); - -int work_group_scan_inclusive_logical_and(int predicate); -int work_group_scan_inclusive_logical_or(int predicate); -int work_group_scan_inclusive_logical_xor(int predicate); - -int work_group_scan_exclusive_logical_and(int predicate); -int work_group_scan_exclusive_logical_or(int predicate); -int work_group_scan_exclusive_logical_xor(int predicate); ----- - -For the following functions, the generic type name `gentype` may be one of the supported built-in scalar data types `int`, `uint`, `long`, or `ulong`. - -[source,opencl_c] ----- -gentype work_group_reduce_and(gentype value); -gentype work_group_reduce_or(gentype value); -gentype work_group_reduce_xor(gentype value); - -gentype work_group_scan_inclusive_and(gentype value); -gentype work_group_scan_inclusive_or(gentype value); -gentype work_group_scan_inclusive_xor(gentype value); - -gentype work_group_scan_exclusive_and(gentype value); -gentype work_group_scan_exclusive_or(gentype value); -gentype work_group_scan_exclusive_xor(gentype value); ----- - -For the following functions, the generic type name `gentype` may be one of the supported built-in scalar data types `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is supported), or `half` (if half precision is supported). - -[source,opencl_c] ----- -gentype work_group_reduce_mul(gentype value); -gentype work_group_scan_inclusive_mul(gentype value); -gentype work_group_scan_exclusive_mul(gentype value); ----- - -=== Modifications to the OpenCL C Specification - -(Add to Section 6.15.16, *Work-group Collective Functions*) :: -+ --- -The table below describes the OpenCL C programming language built-in functions that perform -logical arithmetic operations across work items in a work-group. These functions must be -encountered by all work items in a work-group executing the kernel, otherwise the behavior is -undefined. For these functions, a non-zero _predicate_ argument or return value is logically -`true` and a zero _predicate_ argument or return value is logically `false`. - -[cols="2a,1",options="header"] -|==== -| Function -| Description -|[source,opencl_c] ----- -int work_group_reduce_logical_and(int predicate); -int work_group_reduce_logical_or(int predicate); -int work_group_reduce_logical_xor(int predicate); ----- -| Returns the logical *and*, *or*, or *xor* of _predicate_ for all work items in the work-group. - -|[source,opencl_c] ----- -int work_group_scan_inclusive_logical_and(int predicate); -int work_group_scan_inclusive_logical_or(int predicate); -int work_group_scan_inclusive_logical_xor(int predicate); ----- -| Returns the result of an inclusive scan operation, which is the logical - *and*, *or*, or *xor* of _predicate_ for all work items in the work-group with - a work-group linear local ID less than or equal to this work item’s work-group - linear local ID. - -|[source,c] ----- -int work_group_scan_exclusive_logical_and(int predicate); -int work_group_scan_exclusive_logical_or(int predicate); -int work_group_scan_exclusive_logical_xor(int predicate); ----- -| Returns the result of an exclusive scan operation, which is the logical - *and*, *or*, or *xor* of _predicate_ for all work items in the work-group with - a work-group linear local ID less than this work item’s work-group linear - local ID. - - If there is no work item in the work-group with a work-group linear local ID - less than this work item’s work-group linear local ID then an identity value - `I` is returned. For *and*, the identity value is `true` (non-zero). For *or* - and *xor*, the identity value is `false` (zero). - -|==== - -The table below describes the OpenCL C programming language built-in functions -that perform bitwise integer operations across work items in a work-group. These -functions must be encountered by all work items in a work-group executing the -kernel, otherwise the behavior is undefined. For the functions below, the -generic type name `gentype` may be one of the supported built-in scalar data -types `int`, `uint`, `long`, and `ulong`. - -[cols="2a,1",options="header"] -|==== -| Function -| Description - -|[source,opencl_c] ----- -gentype work_group_reduce_and(gentype value); -gentype work_group_reduce_or(gentype value); -gentype work_group_reduce_xor(gentype value); ----- -| Returns the bitwise *and*, *or*, or *xor* of _value_ for all work items in the work-group. - -|[source,opencl_c] ----- -gentype work_group_scan_inclusive_and(gentype value); -gentype work_group_scan_inclusive_or(gentype value); -gentype work_group_scan_inclusive_xor(gentype value); ----- -| Returns the result of an inclusive scan operation, which is the bitwise *and*, - *or*, or *xor* of _value_ for all work items in the work-group with a - work-group linear local ID less than or equal to this work item’s work-group - linear local ID. - -|[source,opencl_c] ----- -gentype work_group_scan_exclusive_and(gentype value); -gentype work_group_scan_exclusive_or(gentype value); -gentype work_group_scan_exclusive_xor(gentype value); ----- -| Returns the result of an exclusive scan operation, which is the bitwise *and*, - *or*, or *xor* of _value_ for all work items in the work-group with a - work-group linear local ID less than this work item’s work-group linear local - ID. - - If there is no work item in the work-group with a work-group linear local ID less than - this work item’s work-group linear local ID then an identity value `I` is returned. - For *and*, the identity value is `~0` (all bits set). For *or* and *xor*, the identity - value is `0`. - -|==== - -The table below describes the OpenCL C programming language built-in functions -that perform multiplicative operations across work items in a work-group. These -functions must be encountered by all work items in a work-group executing the -kernel, otherwise the behavior is undefined. For the functions below, the -generic type name `gentype` may be one of the supported built-in scalar data -types `int`, `uint`, `long`, `ulong`, `float`, `double` (if double precision is -supported), or `half` (if half precision is supported). - -[cols="2a,1",options="header"] -|==== -| Function -| Description - -|[source,opencl_c] ----- -gentype work_group_reduce_mul(gentype value); ----- -| Returns the multiplication of _value_ for all work items in the work-group. - -|[source,opencl_c] ----- -gentype work_group_scan_inclusive_mul(gentype value); ----- -| Returns the result of an inclusive scan operation which is the multiplication - of _value_ for all work items in the work-group with a work-group linear local - ID less than or equal to this work item’s work-group linear local ID. - -|[source,opencl_c] ----- -gentype work_group_scan_exclusive_mul(gentype value); ----- -| Returns the result of an exclusive scan operation which is the multiplication - of _value_ for all work items in the work-group with a work-group linear local - ID less than this work item’s work-group linear local ID. - - If there is no work item in the work-group with a work-group linear local ID - less than this work item’s work-group linear local ID then the identity value - `1` is returned. - -|==== --- - -=== Issues - -. For these built-in functions, do we only want to support the types supported by the existing work-group collective functions, or do we want to support the types supported by the sub-group collective functions? -+ --- -`RESOLVED`: The extension will require the same types as the existing work-group collective functions. - -The difference are the 8-bit and 16-bit types: `char`, `uchar`, `short`, and `ushort`. Note that `half` is already supported, if half-precision is supported. --- - diff --git a/ext/introduction.asciidoc b/ext/introduction.asciidoc index 1580441f..311549b0 100644 --- a/ext/introduction.asciidoc +++ b/ext/introduction.asciidoc @@ -5,24 +5,36 @@ [[extensions-overview]] == Extensions Overview -This document describes the list of optional features supported by OpenCL. -Optional extensions are not required to be supported by a conformant OpenCL -implementation, but are expected to be widely available, and in some cases may define -functionality that is likely to be required in a future revision of the -OpenCL specification. - -This document describes all extensions that have been approved by the OpenCL -working group. -It is a _unified_ specification, meaning that the extensions described in this -document are not specific to a specific core OpenCL specification version. - -OpenCL extensions approved by the OpenCL working group may be _promoted_ to -core features in later revisions of OpenCL. -When this occurs, the feature described by the extension specification -is merged into the core OpenCL specification. -The extension will continue to be documented in this specification, both for -backwards compatibility and for devices that wish to support the feature -but that are unable to support the newer core OpenCL version. +_Extensions_ are optional features which may be supported by OpenCL +implementations. +Extensions are not required to be supported by a conformant OpenCL +implementation, but are expected to be widely available, and in some cases +may define functionality that is likely to be required in a future revision +of the OpenCL specification. + +In the past, this document contained full specification language for +Khronos-approved `khr` extensions, described in terms of changes to the core +OpenCL Specification. +This extension language has now been integrated into the OpenCL 3.0 +Specification, and can be read in context there. + +The remaining parts of this document describe general issues in _using_ +extensions, such as API <>; OpenCL C +<>; and <>. + +In addition, there is a section on <>. + +Finally, the <> appendix summarizes khr +extensions and links to them in the OpenCL API Specification. +In some cases, extensions are mostly or entirely to the OpenCL C language +rather than to the OpenCL API. +Such extensions can be reached by following the links in the API +Specification extension appendices. + [[naming-convention-for-optional-extensions]] === Naming Convention for Optional Extensions @@ -60,6 +72,7 @@ convention: * All enumerants defined by the vendor extension will have names of the form *CL_<__enum_name__>_<__vendor_name__>.* + [[compiler-directives-for-optional-extensions]] === Compiler Directives for Optional Extensions @@ -144,6 +157,7 @@ A kernel can now use this preprocessor `#define` to do something like: #endif ---- + [[getting-opencl-api-extension-function-pointers]] === Getting OpenCL API Extension Function Pointers diff --git a/ext/quick_reference.asciidoc b/ext/quick_reference.asciidoc index 00757cdb..3b47e039 100644 --- a/ext/quick_reference.asciidoc +++ b/ext/quick_reference.asciidoc @@ -2,286 +2,293 @@ // Creative Commons Attribution 4.0 International License; see // http://creativecommons.org/licenses/by/4.0/ +// The API and C specifications are published in the same directory as the +// extension specification, so only the relative URL is required. + +:APISpecURL: OpenCL_API.html + [appendix] +[[quick-reference]] == Quick Reference +Each extension in this table includes a link to the corresponding appendix +in the OpenCL 3.0 API Specification, which provides a fuller description and +references to the actual extension specification language in the API and C +Language Specifications. + // Editors note: Please keep this table in alphabetical order! [cols="5,4,2",options="header",] |==== -| *Extension Name* -| *Brief Description* -| *Status* +| Extension Name and Link +| Brief Description +| Status -| <> +| [[cl_khr_3d_image_writes]] link:{APISpecURL}#cl_khr_3d_image_writes[`cl_khr_3d_image_writes`] | Write to 3D images | Core Feature in OpenCL 2.0 -| <> +| [[cl_khr_async_work_group_copy_fence]] link:{APISpecURL}#cl_khr_async_work_group_copy_fence[`cl_khr_async_work_group_copy_fence`] | Asynchronous Copy Fences | Extension -| <> +| [[cl_khr_byte_addressable_store]] link:{APISpecURL}#cl_khr_byte_addressable_store[`cl_khr_byte_addressable_store`] | Read and write from 8-bit and 16-bit pointers | Core Feature in OpenCL 1.1 -| <> +| [[cl_khr_command_buffer]] link:{APISpecURL}#cl_khr_command_buffer[`cl_khr_command_buffer`] | Record and Replay Commands | Provisional Extension -| <> +| [[cl_khr_command_buffer_multi_device]] link:{APISpecURL}#cl_khr_command_buffer_multi_device[`cl_khr_command_buffer_multi_device`] | Allow a command-buffer to contain commands targeting different devices | Provisional Extension -| <> +| [[cl_khr_command_buffer_mutable_dispatch]] link:{APISpecURL}#cl_khr_command_buffer_mutable_dispatch[`cl_khr_command_buffer_mutable_dispatch`] | Modify kernel execution commands between enqueues of a command-buffer | Provisional Extension -| <> +| [[cl_khr_create_command_queue]] link:{APISpecURL}#cl_khr_create_command_queue[`cl_khr_create_command_queue`] | API to Create Command-Queues with Properties | Core Feature in OpenCL 2.0 -| <> +| [[cl_khr_d3d10_sharing]] link:{APISpecURL}#cl_khr_d3d10_sharing[`cl_khr_d3d10_sharing`] | Share Direct3D 10 Buffers and Textures with OpenCL | Extension -| <> +| [[cl_khr_d3d11_sharing]] link:{APISpecURL}#cl_khr_d3d11_sharing[`cl_khr_d3d11_sharing`] | Share Direct3D 11 Buffers and Textures with OpenCL | Extension -| <> +| [[cl_khr_depth_images]] link:{APISpecURL}#cl_khr_depth_images[`cl_khr_depth_images`] | Single Channel Depth Images | Core Feature in OpenCL 2.0 -| <> +| [[cl_khr_device_enqueue_local_arg_types]] link:{APISpecURL}#cl_khr_device_enqueue_local_arg_types[`cl_khr_device_enqueue_local_arg_types`] | Pass Non-Void Local Pointers to Child Kernels | Extension -| <> +| [[cl_khr_device_uuid]] link:{APISpecURL}#cl_khr_device_uuid[`cl_khr_device_uuid`] | Unique Device and Driver Identifier Queries | Extension -| <> +| [[cl_khr_dx9_media_sharing]] link:{APISpecURL}#cl_khr_dx9_media_sharing[`cl_khr_dx9_media_sharing`] | Share DirectX 9 Media Surfaces with OpenCL | Extension -| <> +| [[cl_khr_egl_event]] link:{APISpecURL}#cl_khr_egl_event[`cl_khr_egl_event`] | Share EGL Sync Objects with OpenCL | Extension -| <> +| [[cl_khr_egl_image]] link:{APISpecURL}#cl_khr_egl_image[`cl_khr_egl_image`] | Share EGL Images with OpenCL | Extension -| <> +| [[cl_khr_extended_async_copies]] link:{APISpecURL}#cl_khr_extended_async_copies[`cl_khr_extended_async_copies`] | 2D and 3D Async Copies | Extension -| <> +| [[cl_khr_extended_bit_ops]] link:{APISpecURL}#cl_khr_extended_bit_ops[`cl_khr_extended_bit_ops`] | Bit Insert, Extract, and Reverse Operations | Extension -| <> +| [[cl_khr_extended_versioning]] link:{APISpecURL}#cl_khr_extended_versioning[`cl_khr_extended_versioning`] | Extend versioning of platform, devices, extensions, etc. | Core Feature in OpenCL 3.0 (with minor changes) -| <> +| [[cl_khr_external_memory]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory`] | Common Functionality for External Memory Sharing | Provisional Extension -| <> +| [[cl_khr_external_memory_dma_buf]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_dma_buf`] | dma_buf External Memory Handles | Provisional Extension -| <> +| [[cl_khr_external_memory_dx]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_dx`] | Direct3D 11 and 12 External Memory Handles | Provisional Extension -| <> +| [[cl_khr_external_memory_opaque_fd]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_opaque_fd`] | Opaque File Descriptor External Memory Handles | Provisional Extension -| <> +| [[cl_khr_external_memory_win32]] link:{APISpecURL}#cl_khr_external_memory[`cl_khr_external_memory_win32`] | NT Handle External Memory Handles | Provisional Extension -| <> +| [[cl_khr_expect_assume]] link:{APISpecURL}#cl_khr_expect_assume[`cl_khr_expect_assume`] | Kernel Optimization Hints | Extension -| <> +| [[cl_khr_external_semaphore]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore`] | Common Functionality for External Semaphore Sharing | Provisional Extension -| <> +| [[cl_khr_external_semaphore_dx_fence]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_dx_fence`] | Direct3D 12 External Semaphore Handles | Provisional Extension -| <> +| [[cl_khr_external_semaphore_opaque_fd]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_opaque_fd`] | Opaque File Descriptor External Semaphore Handles | Provisional Extension -| <> +| [[cl_khr_external_semaphore_sync_fd]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_sync_fd`] | Sync FD External Semaphore Handles | Provisional Extension -| <> +| [[cl_khr_external_semaphore_win32]] link:{APISpecURL}#cl_khr_external_semaphore[`cl_khr_external_semaphore_win32`] | NT Handle External Semaphore Handles | Provisional Extension -| <> +| [[cl_khr_fp16]] link:{APISpecURL}#cl_khr_fp16[`cl_khr_fp16`] | Operations on 16-bit Floating-Point Values | Extension -| <> +| [[cl_khr_fp64]] link:{APISpecURL}#cl_khr_fp64[`cl_khr_fp64`] | Operations on 64-bit Floating-Point Values | Optional Core Feature in OpenCL 1.2 -| <> +| [[cl_khr_gl_depth_images]] link:{APISpecURL}#cl_khr_gl_depth_images[`cl_khr_gl_depth_images`] | Share OpenGL Depth Images with OpenCL | Extension -| <> +| [[cl_khr_gl_event]] link:{APISpecURL}#cl_khr_gl_event[`cl_khr_gl_event`] | Share OpenGL Fence Sync Objects with OpenCL | Extension -| <> +| [[cl_khr_gl_msaa_sharing]] link:{APISpecURL}#cl_khr_gl_msaa_sharing[`cl_khr_gl_msaa_sharing`] | Share OpenGL MSAA Textures with OpenCL | Extension -| <> +| [[cl_khr_gl_sharing]] link:{APISpecURL}#cl_khr_gl_sharing[`cl_khr_gl_sharing`] | Sharing OpenGL Buffers and Textures with OpenCL | Extension -| <> +| [[cl_khr_global_int32_base_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_global_int32_base_atomics`] | Basic Atomic Operations on 32-bit Integers in Global Memory | Core Feature in OpenCL 1.1 -| <> +| [[cl_khr_global_int32_extended_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_global_int32_extended_atomics`] | Extended Atomic Operations on 32-bit Integers in Global Memory | Core Feature in OpenCL 1.1 -| <> +| [[cl_khr_icd]] link:{APISpecURL}#cl_khr_icd[`cl_khr_icd`] | Installable Client Drivers | Extension -| <> +| [[cl_khr_il_program]] link:{APISpecURL}#cl_khr_il_program[`cl_khr_il_program`] | Support for Intermediate Language (IL) Programs (SPIR-V) | Core Feature in OpenCL 2.1 -| <> +| [[cl_khr_image2d_from_buffer]] link:{APISpecURL}#cl_khr_image2d_from_buffer[`cl_khr_image2d_from_buffer`] | Create 2D Images from Buffers | Core Feature in OpenCL 2.0 -| <> +| [[cl_khr_initialize_memory]] link:{APISpecURL}#cl_khr_initialize_memory[`cl_khr_initialize_memory`] | Initialize Local and Private Memory on Allocation | Extension -| <> +| [[cl_khr_int64_base_atomics]] link:{APISpecURL}#cl_khr_int64_atomics[`cl_khr_int64_base_atomics`] | Basic Atomic Operations on 64-bit Integers in Global and Local Memory | Extension -| <> +| [[cl_khr_int64_extended_atomics]] link:{APISpecURL}#cl_khr_int64_atomics[`cl_khr_int64_extended_atomics`] | Extended Atomic Operations on 64-bit Integers in Global and Local Memory | Extension -| <> +| [[cl_khr_local_int32_base_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_local_int32_base_atomics`] | Basic Atomic Operations on 32-bit Integers in Local Memory | Core Feature in OpenCL 1.1 -| <> +| [[cl_khr_local_int32_extended_atomics]] link:{APISpecURL}#cl_khr_int32_atomics[`cl_khr_local_int32_extended_atomics`] | Extended Atomic Operations on 32-bit Integers in Local Memory | Core Feature in OpenCL 1.1 -| <> +| [[cl_khr_integer_dot_product]] link:{APISpecURL}#cl_khr_integer_dot_product[`cl_khr_integer_dot_product`] | Integer dot product operations | Extension -| <> +| [[cl_khr_mipmap_image]] link:{APISpecURL}#cl_khr_mipmap_image[`cl_khr_mipmap_image`] | Create and Use Images with Mipmaps | Extension -| <> -| Write to Images with Mipmaps -| Extension - -| <> +| [[cl_khr_pci_bus_info]] link:{APISpecURL}#cl_khr_pci_bus_info[`cl_khr_pci_bus_info`] | Query PCI Bus Information for an OpenCL Device | Extension -| <> +| [[cl_khr_priority_hints]] link:{APISpecURL}#cl_khr_priority_hints[`cl_khr_priority_hints`] | Create Command-Queues with Different Priorities | Extension -| <> +| [[cl_khr_select_fprounding_mode]] link:{APISpecURL}#cl_khr_select_fprounding_mode[`cl_khr_select_fprounding_mode`] | Set the Current Kernel Rounding Mode | DEPRECATED -| <> +| [[cl_khr_semaphore]] link:{APISpecURL}#cl_khr_semaphore[`cl_khr_semaphore`] | Semaphore Synchronization Primitives | Provisional Extension -| <> +| [[cl_khr_spir]] link:{APISpecURL}#cl_khr_spir[`cl_khr_spir`] | Standard Portable Intermediate Representation Programs | Extension, Superseded by IL Programs / SPIR-V -| <> +| [[cl_khr_srgb_image_writes]] link:{APISpecURL}#cl_khr_srgb_image_writes[`cl_khr_srgb_image_writes`] | Write to sRGB Images | Extension -| <> +| [[cl_khr_subgroups]] link:{APISpecURL}#cl_khr_subgroups[`cl_khr_subgroups`] | Sub-Groupings of Work Items | Core Feature in OpenCL 2.1 (with minor changes) -| <> +| [[cl_khr_subgroup_ballot]] link:{APISpecURL}#cl_khr_subgroup_ballot[`cl_khr_subgroup_ballot`] | Exchange Ballots Among Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_subgroup_clustered_reduce]] link:{APISpecURL}#cl_khr_subgroup_clustered_reduce[`cl_khr_subgroup_clustered_reduce`] | Clustered Reductions for Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_subgroup_extended_types]] link:{APISpecURL}#cl_khr_subgroup_extended_types[`cl_khr_subgroup_extended_types`] | Additional Type Support for Sub-group Functions | Extension -| <> +| [[cl_khr_subgroup_named_barrier]] link:{APISpecURL}#cl_khr_subgroup_named_barrier[`cl_khr_subgroup_named_barrier`] | Barriers for Subsets of a Work-group | Extension -| <> +| [[cl_khr_subgroup_non_uniform_arithmetic]] link:{APISpecURL}#cl_khr_subgroup_non_uniform_arithmetic[`cl_khr_subgroup_non_uniform_arithmetic`] | Sub-group Arithmetic Functions in Non-Uniform Control Flow | Extension -| <> +| [[cl_khr_subgroup_non_uniform_vote]] link:{APISpecURL}#cl_khr_subgroup_non_uniform_vote[`cl_khr_subgroup_non_uniform_vote`] | Hold Votes Among Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_subgroup_rotate]] link:{APISpecURL}#cl_khr_subgroup_rotate[`cl_khr_subgroup_rotate`] | Rotation Among Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_subgroup_shuffle]] link:{APISpecURL}#cl_khr_subgroup_shuffle[`cl_khr_subgroup_shuffle`] | General-Purpose Shuffles Among Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_subgroup_shuffle_relative]] link:{APISpecURL}#cl_khr_subgroup_shuffle_relative[`cl_khr_subgroup_shuffle_relative`] | Relative Shuffles Among Sub-Groupings of Work Items | Extension -| <> +| [[cl_khr_suggested_local_work_size]] link:{APISpecURL}#cl_khr_suggested_local_work_size[`cl_khr_suggested_local_work_size`] | Query a Suggested Local Work Size | Extension -| <> +| [[cl_khr_terminate_context]] link:{APISpecURL}#cl_khr_terminate_context[`cl_khr_terminate_context`] | Terminate an OpenCL Context | Extension -| <> +| [[cl_khr_throttle_hints]] link:{APISpecURL}#cl_khr_throttle_hints[`cl_khr_throttle_hints`] | Create Command-Queues with Different Throttle Policies | Extension -| <> +| [[cl_khr_work_group_uniform_arithmetic]] link:{APISpecURL}#cl_khr_work_group_uniform_arithmetic[`cl_khr_work_group_uniform_arithmetic`] | Work-group Uniform Arithmetic | Extension diff --git a/extensions/cl_ext_image_requirements_info.asciidoc b/extensions/cl_ext_image_requirements_info.asciidoc index 200116a5..aedc71bc 100644 --- a/extensions/cl_ext_image_requirements_info.asciidoc +++ b/extensions/cl_ext_image_requirements_info.asciidoc @@ -1,6 +1,5 @@ -// Copyright 2018-2021 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 :data-uri: :icons: font @@ -108,7 +107,7 @@ is replaced with: -- For a 2D image created from a buffer, the pitch specified (or computed if -pitch specified is 0) must be a multiple of the +pitch specified is 0) must be a multiple of the {CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT} value returned for parameters compatible with those used to create the image. -- @@ -214,7 +213,7 @@ include::{generated}/api/protos/clGetImageRequirementsInfoEXT.txt[] + Both _image_format_ and _image_desc_ must be non-`NULL`, otherwise {CL_INVALID_VALUE} is returned. - + | {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} | `cl_uint` | Returns the max width supported for creating images with the parameters passed @@ -343,7 +342,7 @@ When `cl_khr_image2d_from_buffer` is supported: * For all image formats, image types and a selection of values for other members in _image_desc_ (that MUST include `0`) ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} query can be performed successfully ** Check that the value is smaller than or equal to the value returned for {CL_DEVICE_IMAGE_MAX_BUFFER_SIZE} for images of {CL_MEM_OBJECT_IMAGE1D_BUFFER} type or smaller than or equal to the value returned for {CL_DEVICE_IMAGE3D_MAX_WIDTH} for images of {CL_MEM_OBJECT_IMAGE3D} type or smaller than or equal to the value returned for {CL_DEVICE_IMAGE2D_MAX_WIDTH} for all other image types. - + . Negative tests for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} * Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query on all image types for which it is not valid * Check that {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases. diff --git a/makeSpec b/makeSpec new file mode 100755 index 00000000..4c3decf3 --- /dev/null +++ b/makeSpec @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +# +# Copyright 2020-2024 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +# Build OpenCL specification with requested extension sets and options. +# +# Usage: makeSpec script-options make-options +# Script options are parsed by this script before invoking 'make': +# -genpath path - directory for generated files and outputs +# -spec core - make a spec with no extensions (default) +# -spec khr - make a spec with all KHR extensions +# -spec all - make a spec with all available registered extensions +# -ext name - add specified extension and its dependencies +# -clean - clean generated files before building +# -registry path - API XML to use instead of default +# -apiname name - API name to use instead of default +# -test - Build the test spec instead +# -v - verbose, print actions before executing them +# -n - dry-run, print actions instead of executing them +# make-options - all other options are passed to 'make', including +# requested build targets + +import argparse, copy, io, os, re, string, subprocess, sys + +def execute(args, results): + if results.verbose or results.dryrun: + print("'" + "' '".join(args) + "'") + if not results.dryrun: + subprocess.check_call(args) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-clean', action='store_true', + help='Clean generated files before building') + parser.add_argument('-extension', action='append', + default=[], + help='Specify a required extension or extensions to add to targets') + parser.add_argument('-genpath', action='store', + default='gen', + help='Path to directory containing generated files') + parser.add_argument('-spec', action='store', + choices=[ 'core', 'khr', 'all' ], + default='core', + help='Type of spec to generate') + parser.add_argument('-registry', action='store', + default=None, + help='Path to API XML registry file specifying version and extension dependencies') + parser.add_argument('-apiname', action='store', + default=None, + help='API name to generate') + parser.add_argument('-test', action='store_true', + help='Build the test spec instead of the Vulkan spec') + parser.add_argument('-n', action='store_true', dest='dryrun', + help='Only prints actions, do not execute them') + parser.add_argument('-v', action='store_true', dest='verbose', + help='Print actions before executing them') + + (results, options) = parser.parse_known_args() + + # Ensure genpath is an absolute path, not relative + if results.genpath[0] != '/': + results.genpath = os.getcwd() + '/' + results.genpath + + # Look for scripts/extdependency.py + # This requires makeSpec to be invoked from the repository root, but we + # could derive that path. + sys.path.insert(0, 'scripts') + from extdependency import ApiDependencies + deps = ApiDependencies(results.registry, results.apiname) + + # List of extensions to build with from the requested -spec + # Also construct a spec title + # This should respect version dependencies as well + + # Temporary workaround, as the spec markup does not include non-khr + # extension appendices yet. + + if results.spec == 'all': + results.spec = 'khr' + print("WARNING: 'all' argument to -results interpreted as 'khr' at present", file=sys.stderr) + + if results.spec == 'core': + title = '' + exts = set() + elif results.spec == 'khr': + title = 'with all KHR extensions' + exts = set(deps.khrExtensions()) + elif results.spec == 'all': + title = 'with all registered extensions' + exts = set(deps.allExtensions()) + + # List of explicitly requested extension and all its supported dependencies + extraexts = set() + for name in results.extension: + if name in deps.allExtensions(): + extraexts.add(name) + for dep in deps.children(name): + if dep in deps.allExtensions(): + extraexts.update({dep}) + else: + raise Exception(f'ERROR: unknown extension {name}') + + # See if any explicitly requested extensions are not implicitly requested + # Add any such extensions to the spec title + extraexts -= exts + if len(extraexts) > 0: + exts.update(extraexts) + if title != '': + title += ' and ' + ', '.join(sorted(extraexts)) + else: + title += 'with ' + ', '.join(sorted(extraexts)) + + if title != '': + title = '(' + title + ')' + + # Finally, actually invoke make as needed for the targets + args = [ 'make', 'GENERATED=' + results.genpath ] + + if results.clean: + # If OUTDIR is set on the command line, pass it to the 'clean' + # target so it is cleaned as well. + cleanopts = ['clean'] + for opt in options: + if opt[:7] == 'OUTDIR=': + cleanopts.append(opt) + try: + execute(args + cleanopts, results) + except: + sys.exit(1) + + # Use the test spec if specified. This is used solely by self tests. + rootdir = os.path.dirname(os.path.abspath(__file__)) + if results.test: + # Set the spec source to the test spec + args.append(f'SPECSRC={rootdir}/build_tests/testspec.adoc') + args.append(f'SPECDIR={rootdir}/build_tests/') + # Make sure the build is invariant + args.append('SPECREVISION=1.2.3') + args.append('SPECDATE=\\"2100-11-22 00:33:44Z\\"') + args.append('SPECREMARK=\\"test build\\"') + + # The actual target + if len(exts) > 0: + args.append(f'EXTENSIONS={" ".join(sorted(exts))}') + args.append(f'APITITLE={title}') + args += options + + try: + execute(args, results) + except: + sys.exit(1) diff --git a/man/static/clCreateEventFromEGLSyncKHR.txt b/man/static/clCreateEventFromEGLSyncKHR.txt deleted file mode 100644 index f067fc3d..00000000 --- a/man/static/clCreateEventFromEGLSyncKHR.txt +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateEventFromEGLSyncKHR(3) - -== Name - -clCreateEventFromEGLSyncKHR - Creates a linked event object. - -[source,c] ----- -cl_event clCreateEventFromEGLSyncKHR(cl_context context, - CLeglSyncKHR sync, - CLeglDisplayKHR display, - cl_int *errcode_ret) ----- - - -== Parameters - - * _context_ - A valid OpenCL context created from an OpenGL context or - share group, using the reflink:cl_khr_gl_sharing extension. - * _sync_ - The name of a sync object of type `EGL_SYNC_FENCE_KHR` created - with respect to `EGLDisplay` _display_. - * _display_ - An `EGLDisplay` handle. - -== Description - -An event object may be created by linking to an EGL sync object. -Completion of such an event object is equivalent to waiting for completion -of the fence command associated with the linked EGL sync object. - - -== Notes - -The parameters of an event object linked to an EGL sync object will return -the following values when queried with flink:clGetEventInfo: - - * The `CL_EVENT_COMMAND_QUEUE` of a linked event is NULL, because the - event is not associated with any OpenCL command-queue. - * The `CL_EVENT_COMMAND_TYPE` of a linked event is - `CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR`, indicating that the event is - associated with a EGL sync object, rather than an OpenCL command. - * The `CL_EVENT_COMMAND_EXECUTION_STATUS` of a linked event is either - `CL_SUBMITTED`, indicating that the fence command associated with the - sync object has not yet completed, or `CL_COMPLETE`, indicating that the - fence command has completed. - -`clCreateEventFromEGLSyncKHR` performs an implicit flink:clRetainEvent on -the returned event object. Creating a linked event object also places a -reference on the linked EGL sync object. When the event object is deleted, -the reference will be removed from the EGL sync object. - -Events returned from `clCreateEventFromEGLSyncKHR` may only be consumed by -`clEnqueueAcquire***` commands. Passing such events to any other OpenCL API -that enqueues commands will generate a `CL_INVALID_EVENT` error." - -Event objects can also be used to reflect the status of an EGL fence sync -object. The sync object in turn refers to a fence command executing in an -EGL client API command stream. This provides another method of coordinating -sharing of EGL / EGL client API objects with OpenCL. Completion of EGL / EGL -client API commands may be determined by placing an EGL fence command after -commands using `eglCreateSyncKHR`, creating an event from the resulting EGL -sync object using `clCreateEventFromEGLSyncKHR` and then specifying it in -the _event_wait_list_ of a `clEnqueueAcquire***` command. This method may be -considerably more efficient than calling operations like `glFinish`, and is -referred to as _explicit synchronization_. The application is responsible -for ensuring the command stream associated with the EGL fence is flushed to -ensure the CL queue is submitted to the device. Explicit synchronization is -most useful when an EGL client API context bound to another thread is -accessing the memory objects. - - -== Errors - -Returns a valid OpenCL event object and _errcode_ret_ is set to `CL_SUCCESS` -if the event object is created successfully. -Otherwise, it returns a NULL value with one of the following error values -returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context or was not - created from a GL context. - * `CL_INVALID_EGL_OBJECT_KHR` if _sync_ is not a valid EGLSyncKHR handle - created with respect to `EGLDisplay` _display_. - * `CL_INVALID_EGL_OBJECT_KHR` if _sync_ is not a valid EGLSyncKHR object - of type `EGL_SYNC_FENCE_KHR` created with respect to `EGLDisplay` - _display_. - -== See Also - -reflink:cl_khr_egl_event, -flink:clEnqueueAcquireEGLObjectsKHR, -flink:clEnqueueAcquireGLObjects - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_event - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateEventFromGLsyncKHR.txt b/man/static/clCreateEventFromGLsyncKHR.txt deleted file mode 100644 index 1cbc618a..00000000 --- a/man/static/clCreateEventFromGLsyncKHR.txt +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateEventFromGLsyncKHR(3) - -== Name - -clCreateEventFromGLsyncKHR - Creates an event object linked to an OpenGL sync object. - -== C Specification - -[source,c] ----- -cl_event clCreateEventFromGLsyncKHR(cl_context context, - GLsync sync, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from an OpenGL context or - share group, using the reflink:cl_khr_gl_sharing extension. - * _sync_ - The name of a sync object in the GL share group associated with - _context_. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Description - -An event object may be created by linking to an OpenGL sync object. -Completion of such an event object is equivalent to waiting for completion of the fence command associated with the linked GL sync object. - -== Notes - -The parameters of an event object linked to a GL sync object will return the following values when queried with flink:clGetEventInfo: - - * The `CL_EVENT_COMMAND_QUEUE` of a linked event is NULL, because the event is not associated with any OpenCL command-queue. - * The `CL_EVENT_COMMAND_TYPE` of a linked event is `CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR`, indicating that the event is associated with a GL sync object, rather than an OpenCL command. - * The `CL_EVENT_COMMAND_EXECUTION_STATUS` of a linked event is either `CL_SUBMITTED`, indicating that the fence command associated with the sync object has not yet completed, or `CL_COMPLETE`, indicating that the fence command has completed. - -`clCreateEventFromGLsyncKHR` performs an implicit flink:clRetainEvent on the returned event object. -Creating a linked event object also places a reference on the linked GL sync object. -When the event object is deleted, the reference will be removed from the GL sync object. - -Events returned from `clCreateEventFromGLsyncKHR` can be used in the -_event_wait_list_ argument to flink:clEnqueueAcquireGLObjects and CL APIs -that take a `cl_event` as an argument but do not enqueue commands. Passing -such events to any other CL API that enqueues commands will generate a -`CL_INVALID_EVENT` error. - -== Errors - -Returns a valid OpenCL event object and _errcode_ret_ is set to `CL_SUCCESS` if the event object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context or was not created from a GL context. - * `CL_INVALID_GL_OBJECT` if _sync_ is not the name of a sync object in the GL share group associated with _context_. - -== See Also - -reflink:cl_khr_gl_sharing, -reflink:cl_khr_gl_event, -flink:clGetEventInfo, -flink:clEnqueueAcquireGLObjects, -flink:clRetainEvent - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromGLBuffer - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D10BufferKHR.txt b/man/static/clCreateFromD3D10BufferKHR.txt deleted file mode 100644 index 445c42aa..00000000 --- a/man/static/clCreateFromD3D10BufferKHR.txt +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D10BufferKHR(3) - -== Name - -clCreateFromD3D10BufferKHR - Creates an OpenCL buffer object from a Direct3D 10 buffer. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D10BufferKHR(cl_context context, - cl_mem_flags flags, - ID3D10Buffer *resource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 10 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 10 buffer to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The size of the returned OpenCL buffer object is the same as the size of _resource_. -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL buffer object and _errcode_ret_ is set to `CL_SUCCESS` if the buffer object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid. - * `CL_INVALID_D3D10_RESOURCE_KHR` if _resource_ is not a Direct3D 10 buffer resource, if _resource_ was created with the `D3D10_USAGE` flag `D3D10_USAGE_IMMUTABLE`, if a `cl_mem` from _resource_ has already been created using `clCreateFromD3D10BufferKHR`, or if _context_ was not created against the same Direct3D 10 device from which _resource_ was created. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d10_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D10BufferKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D10Texture2DKHR.txt b/man/static/clCreateFromD3D10Texture2DKHR.txt deleted file mode 100644 index a8f420d8..00000000 --- a/man/static/clCreateFromD3D10Texture2DKHR.txt +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D10Texture2DKHR(3) - -== Name - -clCreateFromD3D10Texture2DKHR - Creates an OpenCL 2D image object from a subresource of a Direct3D 10 2D texture. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D10Texture2DKHR(cl_context context, - cl_mem_flags flags, - ID3D10Texture2D *resource, - UINT subresource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 10 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 10 2D texture to share. - * _subresource_ - The subresource of _resource_ to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The width, height and depth of the returned OpenCL 2D image object are determined by the width, height and depth of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 2D image object is determined by the format of _resource_ as shown in of Direct3D 10 and corresponding OpenCL image formats of _resource_ in flink:clCreateFromD3D10Texture3DKHR. - -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL image object and _errcode_ret_ is set to `CL_SUCCESS` if the image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if _subresource_ is not a valid subresource index for _resource_. - * `CL_INVALID_D3D10_RESOURCE_KHR` if _resource_ is not a Direct3D 10 texture resource, if _resource_ was created with the `D3D10_USAGE` flag `D3D10_USAGE_IMMUTABLE`, if a _resource_ is a multisampled texture, if a `cl_mem` from subresource _subresource_ of _resource_ has already been created using `clCreateFromD3D10Texture2DKHR`, or if _context_ was not created against the same Direct3D 10 device from which _resource_ was created. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the Direct3D 10 texture format of _resource_ is not a value listed in the table of Direct3D 10 and corresponding OpenCL image formats for flink:clCreateFromD3D10Texture3DKHR, or if the Direct3D 10 texture format of _resource_ does not map to a supported OpenCL image format. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d10_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D10Texture2DKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D10Texture3DKHR.txt b/man/static/clCreateFromD3D10Texture3DKHR.txt deleted file mode 100644 index d8f0ab1c..00000000 --- a/man/static/clCreateFromD3D10Texture3DKHR.txt +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D10Texture3DKHR(3) - -== Name - -clCreateFromD3D10Texture3DKHR - Creates an OpenCL 3D image object from a subresource of a Direct3D 10 3D texture. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D10Texture3DKHR(cl_context context, - cl_mem_flags flags, - ID3D10Texture3D *resource, - UINT subresource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 10 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 10 3D texture to share. - * _subresource_ - The subresource of _resource_ to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The width, height and depth of the returned OpenCL 3D image object are determined by the width, height and depth of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 3D image object is determined by the format of _resource_ as shown below in the table of Direct3D 10 and corresponding OpenCL image formats. - -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -Following is a list of Direct3D 10 and corresponding OpenCL image formats. - -[cols="1a,1a", options="header"] -|==== -| DXGI format | CL image format (channel order, channel data type) -| `DXGI_FORMAT_R32G32B32A32_FLOAT` | CL_RGBA, CL_FLOAT -| `DXGI_FORMAT_R32G32B32A32_UINT` | CL_RGBA, CL_UNSIGNED_INT32 -| `DXGI_FORMAT_R32G32B32A32_SINT` | CL_RGBA, CL_SIGNED_INT32 -| | -| `DXGI_FORMAT_R16G16B16A16_FLOAT` | CL_RGBA, CL_HALF_FLOAT -| `DXGI_FORMAT_R16G16B16A16_UNORM` | CL_RGBA, CL_UNORM_INT16 -| `DXGI_FORMAT_R16G16B16A16_UINT` | CL_RGBA, CL_UNSIGNED_INT16 -| `DXGI_FORMAT_R16G16B16A16_SNORM` | CL_RGBA, CL_SNORM_INT16 -| `DXGI_FORMAT_R16G16B16A16_SINT` | CL_RGBA, CL_SIGNED_INT16 -| | -| `DXGI_FORMAT_R8G8B8A8_UNORM` | CL_BGRA, CL_UNORM_INT8 -| `DXGI_FORMAT_R8G8B8A8_UNORM` | CL_RGBA, CL_UNORM_INT8 -| `DXGI_FORMAT_R8G8B8A8_UINT` | CL_RGBA, CL_UNSIGNED_INT8 -| `DXGI_FORMAT_R8G8B8A8_SNORM` | CL_RGBA, CL_SNORM_INT8 -| `DXGI_FORMAT_R8G8B8A8_SINT` | CL_RGBA, CL_SIGNED_INT8 -| | -| `DXGI_FORMAT_R32G32_FLOAT` | CL_RG, CL_FLOAT -| `DXGI_FORMAT_R32G32_UINT` | CL_RG, CL_UNSIGNED_INT32 -| `DXGI_FORMAT_R32G32_SINT` | CL_RG, CL_SIGNED_INT32 -| | -| `DXGI_FORMAT_R16G16_FLOAT` | CL_RG, CL_HALF_FLOAT -| `DXGI_FORMAT_R16G16_UNORM` | CL_RG, CL_UNORM_INT16 -| `DXGI_FORMAT_R16G16_UINT` | CL_RG, CL_UNSIGNED_INT16 -| `DXGI_FORMAT_R16G16_SNORM` | CL_RG, CL_SNORM_INT16 -| `DXGI_FORMAT_R16G16_SINT` | CL_RG, CL_SIGNED_INT16 -| | -| `DXGI_FORMAT_R8G8_UNORM` | CL_RG, CL_UNORM_INT8 -| `DXGI_FORMAT_R8G8_UINT` | CL_RG, CL_UNSIGNED_INT8 -| `DXGI_FORMAT_R8G8_SNORM` | CL_RG, CL_SNORM_INT8 -| `DXGI_FORMAT_R8G8_SINT` | CL_RG, CL_SIGNED_INT8 -| | -| `DXGI_FORMAT_R32_FLOAT` | CL_R, CL_FLOAT -| `DXGI_FORMAT_R32_UINT` | CL_R, CL_UNSIGNED_INT32 -| `DXGI_FORMAT_R32_SINT` | CL_R, CL_SIGNED_INT32 -| | -| `DXGI_FORMAT_R16_FLOAT` | CL_R, CL_HALF_FLOAT -| `DXGI_FORMAT_R16_UNORM` | CL_R, CL_UNORM_INT16 -| `DXGI_FORMAT_R16_UINT` | CL_R, CL_UNSIGNED_INT16 -| `DXGI_FORMAT_R16_SNORM` | CL_R, CL_SNORM_INT16 -| `DXGI_FORMAT_R16_SINT` | CL_R, CL_SIGNED_INT16 -| | -| `DXGI_FORMAT_R8_UNORM` | CL_R, CL_UNORM_INT8 -| `DXGI_FORMAT_R8_UINT` | CL_R, CL_UNSIGNED_INT8 -| `DXGI_FORMAT_R8_SNORM` | CL_R, CL_SNORM_INT8 -| `DXGI_FORMAT_R8_SINT` | CL_R, CL_SIGNED_INT8 -|==== - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL buffer object and _errcode_ret_ is set to `CL_SUCCESS` if the buffer object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if _subresource_ is not a valid subresource index for _resource_. - * `CL_INVALID_D3D10_RESOURCE_KHR` if _resource_ is not a Direct3D 10 texture resource, if _resource_ was created with the `D3D10_USAGE` flag `D3D10_USAGE_IMMUTABLE`, if a _resource_ is a multisampled texture, if a `cl_mem` from subresource _subresource_ of _resource_ has already been created using `clCreateFromD3D10Texture3DKHR`, or if _context_ was not created against the same Direct3D 10 device from which _resource_ was created. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the Direct3D 10 texture format of _resource_ is not a value listed in the above table of Direct3D 10 and corresponding OpenCL image formats, or if the Direct3D 10 texture format of _resource_ does not map to a supported OpenCL image format. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d10_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D10Texture3DKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D11BufferKHR.txt b/man/static/clCreateFromD3D11BufferKHR.txt deleted file mode 100644 index e3a007aa..00000000 --- a/man/static/clCreateFromD3D11BufferKHR.txt +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D11BufferKHR(3) - -== Name - -clCreateFromD3D11BufferKHR - Creates an OpenCL buffer object from a Direct3D 11 buffer. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D11BufferKHR(cl_context context, - cl_mem_flags flags, - ID3D11Buffer *resource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 11 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 11 buffer to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The size of the returned OpenCL buffer object is the same as the size of _resource_. -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL buffer object and _errcode_ret_ is set to `CL_SUCCESS` if the buffer object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid. - * `CL_INVALID_D3D11_RESOURCE_KHR` if _resource_ is not a Direct3D 11 buffer resource, if _resource_ was created with the `D3D11_USAGE` flag `D3D11_USAGE_IMMUTABLE`, if a `cl_mem` from _resource_ has already been created using `clCreateFromD3D11BufferKHR`, or if _context_ was not created against the same Direct3D 11 device from which _resource_ was created. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D11BufferKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D11Texture2DKHR.txt b/man/static/clCreateFromD3D11Texture2DKHR.txt deleted file mode 100644 index 1ff783c0..00000000 --- a/man/static/clCreateFromD3D11Texture2DKHR.txt +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D11Texture2DKHR(3) - -== Name - -clCreateFromD3D11Texture2DKHR - Creates an OpenCL 2D image object from a subresource of a Direct3D 11 2D texture. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D11Texture2DKHR(cl_context context, - cl_mem_flags flags, - ID3D11Texture2D *resource, - UINT subresource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 11 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 11 2D texture to share. - * _subresource_ - The subresource of _resource_ to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The width, height and depth of the returned OpenCL 2D image object are determined by the width, height and depth of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 2D image object is determined by the format of _resource_ as shown in Direct3D 11 and corresponding OpenCL image formats of _resource_ in flink:clCreateFromD3D11Texture3DKHR. - -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL image object and _errcode_ret_ is set to `CL_SUCCESS` if the image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if _subresource_ is not a valid subresource index for _resource_. - * `CL_INVALID_D3D11_RESOURCE_KHR` if _resource_ is not a Direct3D 11 texture resource, if _resource_ was created with the `D3D11_USAGE` flag `D3D11_USAGE_IMMUTABLE`, if a _resource_ is a multisampled texture, if a `cl_mem` from subresource _subresource_ of _resource_ has already been created using `clCreateFromD3D11Texture2DKHR`, or if _context_ was not created against the same Direct3D 11 device from which _resource_ was created. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the Direct3D 11 texture format of _resource_ is not a value listed in the table of Direct3D 11 and corresponding OpenCL image formats for flink:clCreateFromD3D11Texture3DKHR, or if the Direct3D 11 texture format of _resource_ does not map to a supported OpenCL image format. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D11Texture2DKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromD3D11Texture3DKHR.txt b/man/static/clCreateFromD3D11Texture3DKHR.txt deleted file mode 100644 index ccbec766..00000000 --- a/man/static/clCreateFromD3D11Texture3DKHR.txt +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromD3D11Texture3DKHR(3) - -== Name - -clCreateFromD3D11Texture3DKHR - Creates an OpenCL 3D image object from a subresource of a Direct3D 11 3D texture. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromD3D11Texture3DKHR(cl_context context, - cl_mem_flags flags, - ID3D11Texture3D *resource, - UINT subresource, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a Direct3D 11 device. - * _flags_ - A bit-field that is used to specify usage information. May be - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, or `CL_MEM_READ_WRITE`. (See - the table for flink:clCreateBuffer for more information.) - * _resource_ - A pointer to the Direct3D 11 texture to share. - * _subresource_ - The subresource of _resource_ to share. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The width, height and depth of the returned OpenCL 3D image object are determined by the width, height and depth of subresource _subresource_ of _resource_. -The channel type and order of the returned OpenCL 3D image object is determined by the format of _resource_ as shown in the table below (Table 9.9.3). - -This call will increment the internal Direct3D reference count on _resource_. -The internal Direct3D reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -[cols="1a,1a", options="header"] -|==== -| DXGI format | CL image format (channel order, channel data type) -| `DXGI_FORMAT_R32G32B32A32_FLOAT` | `CL_RGBA, CL_FLOAT` -| `DXGI_FORMAT_R32G32B32A32_UINT` | `CL_RGBA, CL_UNSIGNED_INT32` -| `DXGI_FORMAT_R32G32B32A32_SINT` | `CL_RGBA, CL_SIGNED_INT32` -| `DXGI_FORMAT_R16G16B16A16_FLOAT` | `CL_RGBA, CL_HALF_FLOAT` -| `DXGI_FORMAT_R16G16B16A16_UNORM` | `CL_RGBA, CL_UNORM_INT16` -| `DXGI_FORMAT_R16G16B16A16_UINT` | `CL_RGBA, CL_UNSIGNED_INT16` -| `DXGI_FORMAT_R16G16B16A16_SNORM` | `CL_RGBA, CL_SNORM_INT16` -| `DXGI_FORMAT_R16G16B16A16_SINT` | `CL_RGBA, CL_SIGNED_INT16` -| `DXGI_FORMAT_B8G8R8A8_UNORM` | `CL_BGRA, CL_UNORM_INT8` -| `DXGI_FORMAT_R8G8B8A8_UNORM` | `CL_RGBA, CL_UNORM_INT8` -| `DXGI_FORMAT_R8G8B8A8_UINT` | `CL_RGBA, CL_UNSIGNED_INT8` -| `DXGI_FORMAT_R8G8B8A8_SNORM` | `CL_RGBA, CL_SNORM_INT8` -| `DXGI_FORMAT_R8G8B8A8_SINT` | `CL_RGBA, CL_SIGNED_INT8` -| `DXGI_FORMAT_R32G32_FLOAT` | `CL_RG, CL_FLOAT` -| `DXGI_FORMAT_R32G32_UINT` | `CL_RG, CL_UNSIGNED_INT32` -| `DXGI_FORMAT_R32G32_SINT` | `CL_RG, CL_SIGNED_INT32` -| `DXGI_FORMAT_R16G16_FLOAT` | `CL_RG, CL_HALF_FLOAT` -| `DXGI_FORMAT_R16G16_UNORM` | `CL_RG, CL_UNORM_INT16` -| `DXGI_FORMAT_R16G16_UINT` | `CL_RG, CL_UNSIGNED_INT16` -| `DXGI_FORMAT_R16G16_SNORM` | `CL_RG, CL_SNORM_INT16` -| `DXGI_FORMAT_R16G16_SINT` | `CL_RG, CL_SIGNED_INT16` -| `DXGI_FORMAT_R8G8_UNORM` | `CL_RG, CL_UNORM_INT8` -| `DXGI_FORMAT_R8G8_UINT` | `CL_RG, CL_UNSIGNED_INT8` -| `DXGI_FORMAT_R8G8_SNORM` | `CL_RG, CL_SNORM_INT8` -| `DXGI_FORMAT_R8G8_SINT` | `CL_RG, CL_SIGNED_INT8` -| `DXGI_FORMAT_R32_FLOAT` | `CL_R, CL_FLOAT` -| `DXGI_FORMAT_R32_UINT` | `CL_R, CL_UNSIGNED_INT32` -| `DXGI_FORMAT_R32_SINT` | `CL_R, CL_SIGNED_INT32` -| `DXGI_FORMAT_R16_FLOAT` | `CL_R, CL_HALF_FLOAT` -| `DXGI_FORMAT_R16_UNORM` | `CL_R, CL_UNORM_INT16` -| `DXGI_FORMAT_R16_UINT` | `CL_R, CL_UNSIGNED_INT16` -| `DXGI_FORMAT_R16_SNORM` | `CL_R, CL_SNORM_INT16` -| `DXGI_FORMAT_R16_SINT` | `CL_R, CL_SIGNED_INT16` -| `DXGI_FORMAT_R8_UNORM` | `CL_R, CL_UNORM_INT8` -| `DXGI_FORMAT_R8_UINT` | `CL_R, CL_UNSIGNED_INT8` -| `DXGI_FORMAT_R8_SNORM` | `CL_R, CL_SNORM_INT8` -| `DXGI_FORMAT_R8_SINT` | `CL_R, CL_SIGNED_INT8` -|==== - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns a valid non-zero OpenCL image object and _errcode_ret_ is set to `CL_SUCCESS` if the image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if _subresource_ is not a valid subresource index for _resource_. - * `CL_INVALID_D3D11_RESOURCE_KHR` if _resource_ is not a Direct3D 11 texture resource, if _resource_ was created with the `D3D11_USAGE` flag `D3D11_USAGE_IMMUTABLE`, if a _resource_ is a multisampled texture, if a `cl_mem` from subresource _subresource_ of _resource_ has already been created using `clCreateFromD3D11Texture3DKHR`, or if _context_ was not created against the same Direct3D 11 device from which _resource_ was created. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the Direct3D 11 texture format of _resource_ is not a value listed in the table above of Direct3D 11 and corresponding OpenCL image formats, or if the Direct3D 11 texture format of _resource_ does not map to a supported OpenCL image format. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromD3D11Texture3DKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromDX9MediaSurfaceKHR.txt b/man/static/clCreateFromDX9MediaSurfaceKHR.txt deleted file mode 100644 index af3a7a4b..00000000 --- a/man/static/clCreateFromDX9MediaSurfaceKHR.txt +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromDX9MediaSurfaceKHR(3) - -== Name - -clCreateFromDX9MediaSurfaceKHR - Creates an OpenCL image object from a media surface. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromDX9MediaSurfaceKHR(cl_context context, - cl_mem_flags flags, - cl_dx9_media_adapter_type_khr adapter_type, - void *surface_info, - cl_uint plane, - cl_int *errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from a media adapter. - * _flags_ - A bit-field that is used to specify usage information. Refer - to the table of allowed values for _flags_ for flink:clCreateBuffer. - Only `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, and `CL_MEM_READ_WRITE` - values specified in the table of allowed values for _flags_ for - flink:clCreateBuffer can be used. - * _adapter_type_ -+ --- -A value from enumeration of supported adapters described in the table of -`cl_dx9_media_adapter_type_khr` values for -flink:clGetDeviceIDsFromDX9MediaAdapterKHR. -The type of _surface_info_ is determined by the adapter type. -The implementation does not need to support all adapter types. -This approach provides flexibility to support additional adapter types in -the future. -Supported adapter types are `CL_ADAPTER_D3D9_KHR`, `CL_ADAPTER_D3D9EX_KHR`, -and `CL_ADAPTER_DXVA_KHR`. - -If _adapter_type_ is `CL_ADAPTER_D3D9_KHR`, `CL_ADAPTER_D3D9EX_KHR`, or -`CL_ADAPTER_DXVA_KHR`, the _surface_info_ points to the following structure: - -`typedef struct _cl_dx9_surface_info_khr { IDirect3DSurface9 *resource; HANDLE shared_handle; } cl_dx9_surface_info_khr;` - -For D3D9 surfaces, we need both the handle to the resource and the resource -itself to have a sufficient amount of information to eliminate a copy of the -surface for sharing in cases where this is possible. Elimination of the copy -is driver dependent. `shared_handle` may be NULL and this may result in -sub-optimal performance. --- - * _surface_info_ - A pointer to one of the structures defined in the - _adapter_type_ description above passed in as a `void` *. - * _plane_ - The plane of resource to share for planar surface formats. For - planar formats, we use the plane parameter to obtain a handle to thie - specific plane (Y, U or V for example). For nonplanar formats used by - media, _plane_ must be 0. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Notes - -The width and height of the returned OpenCL 2D image object are determined by the width and height of the plane of resource. -The channel type and order of the returned image object is determined by the format and plane of resource and are described in tables 9.10.3 and 9.10.4. - -This call will increment the internal media surface count on _resource_. -The internal media surface reference count on _resource_ will be decremented when the OpenCL reference count on the returned OpenCL memory object drops to zero. - -include::sharingDX9Inc.txt[] - -== Errors - -Returns a valid non-zero 2D image object and _errcode_ret_ is set to `CL_SUCCESS` if the 2D image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if _plane_ is not a valid plane of _resource_ specified in _surface_info_. - * `CL_INVALID_DX9_MEDIA_SURFACE_KHR` if _resource_ specified in _surface_info_ is not a valid resource or is not associated with _adapter_type_ (e.g., _adapter_type_ is set to `CL_ADAPTER_D3D9_KHR` and _resource_ is not a Direct3D 9 surface created in `D3DPOOL_DEFAULT`). - * `CL_INVALID_DX9_MEDIA_SURFACE_KHR` if `shared_handle` specified in _surface_info_ is not NULL or a valid handle value. - * `CL_INVALID_DX9_MEDIA_SURFACE_KHR` if _adapter_type_ is set to a media adapter and the _surface_info_ does not reference a media surface of the required type, or if _adapter_type_ is set to a media adapter type and _surface_info_ does not contain a valid reference to a media surface on that adapter, by - flink:clGetMemObjectInfo when _param_name_ is a surface or handle when the image was not created from an appropriate media surface, and from - flink:clGetImageInfo when _param_name_ is `CL_IMAGE_DX9_MEDIA_PLANE_KHR` and image was not created from an appropriate media surface. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the texture format of _resource_ is not listed in tables 9.10.3 and 9.10.4. - * `CL_INVALID_OPERATION` if there are no devices in _context_ that support _adapter_type_. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_dx9_media_sharing, -flink:clCreateBuffer, - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromDX9MediaSurfaceKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromEGLImageKHR.txt b/man/static/clCreateFromEGLImageKHR.txt deleted file mode 100644 index 99e85f0d..00000000 --- a/man/static/clCreateFromEGLImageKHR.txt +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromEGLImageKHR(3) - -== Name - -clCreateFromEGLImageKHR - Creates an EGLImage target from an EGLImage source. - -[source,c] ----- -cl_mem clCreateFromEGLImageKHR(cl_context context, - CLeglDisplayKHR display, - CLeglImageKHR image, - cl_mem_flags flags, - const cl_egl_image_properties_khr * properties, - cl_int * errcode_ret) ----- - - -== Parameters - - * _context_ - A valid OpenCL context. - * _display_ - Should be of type `EGLDisplay`, cast into the type - `CLeglDisplayKHR`. - * _image_ - Should be of type `EGLImageKHR`, cast into the type - `CLeglImageKHR`. Assuming no errors are generated in this function, the - resulting image object will be an EGLImage target of the specified - EGLImage _image_. The resulting `cl_mem` is an image object which may be - used normally by all OpenCL operations. This maps to an `image2d_t` type - in OpenCL kernel code. - * _flags_ - -+ --- -A bit-field that is used to specify usage information about the memory -object being created. The possible values for _flags_ are: -`CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY` and `CL_MEM_READ_WRITE`. - -For OpenCL 1.2 _flags_ also accepts: `CL_MEM_HOST_WRITE_ONLY`, -`CL_MEM_HOST_READ_ONLY` or `CL_MEM_HOST_NO_ACCESS`. - -This extension only requires support for `CL_MEM_READ_ONLY`, and for OpenCL -1.2 `CL_MEM_HOST_NO_ACCESS`. For OpenCL 1.1, a `CL_INVALID_OPERATION` will -be returned for images which do not support host mapping. - -If the value passed in _flags_ is not supported by the OpenCL implementation -it will return `CL_INVALID_VALUE`. The accepted _flags_ may be dependent -upon the texture format used. --- - * _properties_ - Specifies a list of property names and their - corresponding values. Each property name is immediately followed by the - corresponding desired value. The list is terminated with 0. No - properties are currently supported with this version of the extension. - _properties_ can be NULL. - * _errcode_ret_ - An appropriate error code. If _errcode_ret_ is NULL, no - error code is returned. - -== Description - -`clCreateFromEGLImageKHR` creates an EGLImage target of type `cl_mem` from -the EGLImage source provided as _image_. - -.Lifetime of Shared Objects - -An OpenCL memory object created from an EGL image remains valid according to -the lifetime behaviour as described in EGL_KHR_image_base. - -"Any EGLImage siblings exist in any client API context" - -For OpenCL this means that while the application retains a reference on the -`cl_mem` (EGL sibling), the image remains valid. - -.Synchronizing OpenCL and EGL Access to Shared Objects - -In order to ensure data integrity, the application is responsible for -synchronizing access to shared CL/EGL objects by their respective APIs. -Failure to provide such synchronization may result in race conditions and -other undefined behavior including non-portability between implementations. - -Prior to calling flink:clEnqueueAcquireEGLObjectsKHR, the application must -ensure that any pending operations which access the objects specified in -_mem_objects_ have completed. This may be accomplished in a portable way by -ceasing all client operations on the resource, and issuing and waiting for -completion of a `glFinish` command on all GL contexts with pending -references to these objects. Implementations may offer more efficient -synchronization methods, such as synchronisation primitives or fence -operations. - -Similarly, after calling `clEnqueueReleaseEGLImageObjects`, the application -is responsible for ensuring that any pending OpenCL operations which access -the objects specified in _mem_objects_ have completed prior to executing -subsequent commands in other APIs which reference these objects. This may be -accomplished in a portable way by calling flink:clWaitForEvents with -the event object returned by flink:clEnqueueReleaseGLObjects, or by -calling flink:clFinish. As above, some implementations may offer more -efficient methods. - -Attempting to access the data store of an EGLImage object after it has been -acquired by OpenCL and before it has been released will result in undefined -behavior. Similarly, attempting to access a shared EGLImage object from -OpenCL before it has been acquired by the OpenCL command-queue or after it -has been released, will result in undefined behavior. - -== Errors - - * `CL_INVALID_CONTEXT` if _context_ is not a valid OpenCL context. - * `CL_INVALID_VALUE` if _properties_ contains invalid values, if _display_ - is not a valid display object or if _flags_ are not in the set defined - above. - * `CL_INVALID_EGL_OBJECT_KHR` if _image_ is not a valid EGLImage object. - * `CL_IMAGE_FORMAT_NOT_SUPPORTED` if the OpenCL implementation is not able - to create a `cl_mem` compatible with the provided `CLeglImageKHR` for an - implementation-dependent reason (this could be caused by, but not limited - to, reasons such as unsupported texture formats, etc). - * `CL_INVALID_OPERATION` if there are no devices in _context_ that support - images (i.e. `CL_DEVICE_IMAGE_SUPPORT` specified in table 4.3 (see - flink:clGetDeviceInfo) is `CL_FALSE`) or if the flags passed are not - supported for that image type. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources - required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_egl_image, -reflink:cl_khr_egl_event - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_image - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromGLBuffer.txt b/man/static/clCreateFromGLBuffer.txt deleted file mode 100644 index 8518310a..00000000 --- a/man/static/clCreateFromGLBuffer.txt +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromGLBuffer(3) - -== Name - -clCreateFromGLBuffer - Creates an OpenCL buffer object from an OpenGL buffer object. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromGLBuffer(cl_context context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from an OpenGL context. - * _flags_ - A bit-field that is used to specify usage information. Refer - to the table for flink:clCreateBuffer for a description of _flags_. Only - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY` and `CL_MEM_READ_WRITE` values - specified in the table at flink:clCreateBuffer can be used. - * _bufobj_ - The name of a GL buffer object. The data store of the GL - buffer object must have have been previously created by calling OpenGL - function `glBufferData`, although its contents need not be initialized. - The size of the data store will be used to determine the size of the CL - buffer object. - * _errcode_ret_ - Returns an appropriate error code as described below. If - _errcode_ret_ is NULL, no error code is returned. - -== Description - -The size of the GL buffer object data store at the time `clCreateFromGLBuffer` is called will be used as the size of buffer object returned by `clCreateFromGLBuffer`. -If the state of a GL buffer object is modified through the GL API (e.g. -`glBufferData`) while there exists a corresponding CL buffer object, subsequent use of the CL buffer object will result in undefined behavior. - -The flink:clRetainMemObject and -flink:clReleaseMemObject functions can be used to retain and release the buffer object. - -The CL buffer object created using `clCreateFromGLBuffer` can also be used to create a CL 1D image buffer object. - -== Notes - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - -== Errors - -Returns a valid non-zero OpenCL buffer object and _errcode_ret_ is set to `CL_SUCCESS` if the buffer object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context or was not created from a GL context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid. - * `CL_INVALID_GL_OBJECT` if _bufobj_ is not a GL buffer object or is a GL buffer object but does not have an existing data store or the size of the buffer is 0. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -reflink:cl_khr_gl_event, -flink:clCreateBuffer, -flink:clCreateFromGLTexture - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromGLBuffer - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromGLRenderbuffer.txt b/man/static/clCreateFromGLRenderbuffer.txt deleted file mode 100644 index a79e8e4d..00000000 --- a/man/static/clCreateFromGLRenderbuffer.txt +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromGLRenderbuffer(3) - -== Name - -clCreateFromGLRenderbuffer - Creates an OpenCL 2D image object from an OpenGL renderbuffer object. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromGLRenderbuffer(cl_context context, - cl_mem_flags flags, - GLuint renderbuffer, - cl_int * errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from an OpenGL context. - * _flags_ - A bit-field that is used to specify usage information. Refer - to the table at flink:clCreateBuffer for a description of _flags_. Only - `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY`, and `CL_MEM_READ_WRITE` values - specified in the table at flink:clCreateBuffer can be used. - * _renderbuffer_ - The name of a GL renderbuffer object. The renderbuffer - storage must be specified before the image object can be created. The - _renderbuffer_ format and dimensions defined by OpenGL will be used to - create the 2D image object. Only GL renderbuffers with internal formats - that map to appropriate image channel order and data type specified in - tables 5.5 and 5.6 (see reflink:cl_image_format) can be used to create - the 2D image object. - * _errcode_ret_ - Returns an appropriate error code as described below. If - _errcode_ret_ is NULL, no error code is returned. - -== Description - -If the state of a GL renderbuffer object is modified through the GL API (i.e. -changes to the dimensions or format used to represent pixels of the GL renderbuffer using appropriate GL API calls such as `glRenderbufferStorage`) while there exists a corresponding CL image object, subsequent use of the CL image object will result in undefined behavior. - -The flink:clRetainMemObject and -flink:clReleaseMemObject functions can be used to retain and release the image objects. - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - -== Errors - -Returns a valid non-zero OpenCL image object and _errcode_ret_ is set to `CL_SUCCESS` if the image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context or was not created from a GL context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid. - * `CL_INVALID_GL_OBJECT` if _renderbuffer_ is not a GL renderbuffer object or if the width or height of _renderbuffer_ is zero. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the OpenGL renderbuffer internal format does not map to a supported OpenCL image format. - * `CL_INVALID_OPERATION` if _renderbuffer_ is a multi-sample GL renderbuffer object. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -flink:clCreateBuffer, -reflink:cl_image_format, -flink:clRetainMemObject, -flink:clReleaseMemObject - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromGLRenderbuffer - -== Copyright - -include::footer.txt[] diff --git a/man/static/clCreateFromGLTexture.txt b/man/static/clCreateFromGLTexture.txt deleted file mode 100644 index 6cffe940..00000000 --- a/man/static/clCreateFromGLTexture.txt +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clCreateFromGLTexture(3) - -== Name - -clCreateFromGLTexture - Creates an OpenCL image object, image array object, or image buffer object from an OpenGL texture object, texture array object, texture buffer object, or a single face of an OpenGL cubemap texture object. - -== C Specification - -[source,c] ----- -cl_mem clCreateFromGLTexture(cl_context context, - cl_mem_flags flags, - GLenum texture_target, - GLint miplevel, - GLuint texture, - cl_int * errcode_ret) ----- - -== Parameters - - * _context_ - A valid OpenCL context created from an OpenGL context. - * _flags_ - A bit-field that is used to specify usage information. Refer - to the table for flink:clCreateBuffer for a description of _flags_. Only - the values `CL_MEM_READ_ONLY`, `CL_MEM_WRITE_ONLY` and - `CL_MEM_READ_WRITE` can be used. - * _texture_target_ - -+ --- -This value must be one of `GL_TEXTURE_1D`, `GL_TEXTURE_1D_ARRAY`, -`GL_TEXTURE_BUFFER`, `GL_TEXTURE_2D`, `GL_TEXTURE_2D_ARRAY`, -`GL_TEXTURE_3D`, `GL_TEXTURE_CUBE_MAP_POSITIVE_X`, -`GL_TEXTURE_CUBE_MAP_POSITIVE_Y`, `GL_TEXTURE_CUBE_MAP_POSITIVE_Z`, -`GL_TEXTURE_CUBE_MAP_NEGATIVE_X`, `GL_TEXTURE_CUBE_MAP_NEGATIVE_Y`, -`GL_TEXTURE_CUBE_MAP_NEGATIVE_Z`, or `GL_TEXTURE_RECTANGLE`. -(`GL_TEXTURE_RECTANGLE` requires OpenGL 3.1. Alternatively, -`GL_TEXTURE_RECTANGLE_ARB` may be specified if the OpenGL extension -`GL_ARB_texture_rectangle` is supported.) _texture_target_ is used only to -define the image type of _texture_. No reference to a bound GL texture -object is made or implied by this parameter. - -If the reflink:cl_khr_gl_msaa_sharing extension is enabled, _texture_target_ -may be `GL_TEXTURE_2D_MULTISAMPLE` or `GL_TEXTURE_2D_MULTISAMPLE_ARRAY`. - -If _texture_target_ is `GL_TEXTURE_2D_MULTISAMPLE`, `clCreateFromGLTexture` -creates an OpenCL 2D multi-sample image object from an OpenGL 2D -multi-sample texture - -If _texture_target_ is `GL_TEXTURE_2D_MULTISAMPLE_ARRAY`, -`clCreateFromGLTexture` creates an OpenCL 2D multi-sample array image object -from an OpenGL 2D multi-sample texture. --- - * _miplevel_ - The mipmap level to be used. If _texture_target_ is - `GL_TEXTURE_BUFFER`, miplevel must be 0. Implementations may return - `CL_INVALID_OPERATION` for miplevel values > 0 - * _texture_ - The name of a GL 1D, 2D, 3D, 1D array, 2D array, cubemap, - rectangle or buffer texture object. The texture object must be a - complete texture as per OpenGL rules on texture completeness. The - _texture_ format and dimensions defined by OpenGL for the specified - _miplevel_ of the texture will be used to create the OpenCL image memory - object. Only GL texture objects with an internal format that maps to - appropriate image channel order and data type specified in tables 5.5 - and 5.6 (see reflink:cl_image_format) may be used to create the OpenCL - image memory object. - * _errcode_ret_ - Returns an appropriate error code as described below. If - _errcode_ret_ is NULL, no error code is returned. - -== Notes - -If the state of a GL texture object is modified through the GL API (e.g. -`glTexImage2D`, `glTexImage3D` or the values of the texture parameters `GL_TEXTURE_BASE_LEVEL` or `GL_TEXTURE_MAX_LEVEL` are modified) while there exists a corresponding CL image object, subsequent use of the CL image object will result in undefined behavior. - -The flink:clRetainMemObject and -flink:clReleaseMemObject functions can be used to retain and release the image objects. - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - -== Errors - -Returns a valid non-zero OpenCL image object and _errcode_ret_ is set to `CL_SUCCESS` if the image object is created successfully. -Otherwise, it returns a NULL value with one of the following error values returned in _errcode_ret_: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid context or was not created from a GL context. - * `CL_INVALID_VALUE` if values specified in _flags_ are not valid or if value specified in _texture_target_ is not one of the values specified in the description of _texture_target_. - * `CL_INVALID_MIP_LEVEL` if _miplevel_ is less than the value of _level~base~_ (for OpenGL implementations) or zero (for OpenGL ES implementations); or greater than the value of _q_ (for both OpenGL and OpenGL ES). - _level~base~_ and _q_ are defined for the texture in section 3.8.10 (Texture Completeness) of the OpenGL 2.1 specification and section 3.7.10 of the OpenGL ES 2.0. - * `CL_INVALID_MIP_LEVEL` if _miplevel_ is greater than zero and the OpenGL implementation does not support creating from non-zero mipmap levels. - * `CL_INVALID_GL_OBJECT` if _texture_ is not a GL texture object whose type matches _texture_target_, if the specified _miplevel_ of _texture_ is not defined, or if the width or height of the specified _miplevel_ is zero or if the GL texture object is incomplete. - * `CL_INVALID_IMAGE_FORMAT_DESCRIPTOR` if the OpenGL texture internal format does not map to a supported OpenCL image format. - * `CL_INVALID_OPERATION` if _texture_ is a GL texture object created with a border width value greater than zero. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -flink:clCreateBuffer, -flink:clCreateFromGLBuffer - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clCreateFromGLTexture - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueAcquireD3D10ObjectsKHR.txt b/man/static/clEnqueueAcquireD3D10ObjectsKHR.txt deleted file mode 100644 index 94632277..00000000 --- a/man/static/clEnqueueAcquireD3D10ObjectsKHR.txt +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueAcquireD3D10ObjectsKHR(3) - -== Name - -clEnqueueAcquireD3D10ObjectsKHR - Acquire OpenCL memory objects that have been created from Direct3D 10 resources - -== C Specification - -[source,c] ----- -cl_int clEnqueueAcquireD3D10ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from Direct3D 10 resources. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this particular - command and can be used to query or queue a wait for this particular - command to complete. _event_ can be NULL, in which case it will not be - possible for the application to query the status of this command or - queue a wait for this command to complete. If _event_wait_list_ and - _event_ are not NULL, _event_ should not refer to an element of the - _event_wait_list_ array. - -== Notes - -The Direct3D 10 objects are acquired by the OpenCL context associated with `command-queue` and can therefore be used by all command-queues associated with the OpenCL context. - -OpenCL memory objects created from Direct3D 10 resources must be acquired before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a Direct3D 10 resource is used while it is not currently acquired by OpenCL, the call attempting to use that OpenCL memory object will return `CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR`. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueAcquireD3D10ObjectsKHR` provides the synchronization guarantee that any Direct3D 10 calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireD3D10ObjectsKHR` is called will complete executing before _event_ reports completion and before the execution of any subsequent OpenCL work issued in _command_queue_ begins. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any Direct3D 10 calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireD3D10ObjectsKHR` is called have completed before calling `clEnqueueAcquireD3D10ObjectsKHR`. - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL then the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from Direct3D 10 resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from an Direct3D 10 context. - * `CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR` when any of _mem_objects_ are currently acquired using `clEnqueueAcquireD3D10ObjectsKHR` but have not been released using flink:clEnqueueReleaseD3D10ObjectsKHR. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d10_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueAcquireD3D10ObjectsKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueAcquireD3D11ObjectsKHR.txt b/man/static/clEnqueueAcquireD3D11ObjectsKHR.txt deleted file mode 100644 index 34822411..00000000 --- a/man/static/clEnqueueAcquireD3D11ObjectsKHR.txt +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueAcquireD3D11ObjectsKHR(3) - -== Name - -clEnqueueAcquireD3D11ObjectsKHR - Acquire OpenCL memory objects that have been created from Direct3D 11 resources - -== C Specification - -[source,c] ----- -cl_int clEnqueueAcquireD3D11ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from Direct3D 11 resources. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this particular - command and can be used to query or queue a wait for this particular - command to complete. _event_ can be NULL in which case it will not be - possible for the application to query the status of this command or - queue a wait for this command to complete. If _event_wait_list_ and - _event_ are not NULL, _event_ should not refer to an element of the - _event_wait_list_ array. - -== Notes - -The Direct3D 11 objects are acquired by the OpenCL context associated with `command-queue` and can therefore be used by all command-queues associated with the OpenCL context. - -OpenCL memory objects created from Direct3D 11 resources must be acquired before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a Direct3D 11 resource is used while it is not currently acquired by OpenCL, the call attempting to use that OpenCL memory object will return `CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR`. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueAcquireD3D11ObjectsKHR` provides the synchronization guarantee that any Direct3D 11 calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireD3D11ObjectsKHR` is called will complete executing before _event_ reports completion and before the execution of any subsequent OpenCL work issued in _command_queue_ begins. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any Direct3D 11 calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireD3D11ObjectsKHR` is called have completed before calling `clEnqueueAcquireD3D11ObjectsKHR`. - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL then the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from Direct3D 11 resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from a Direct3D 11 context. - * `CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR` if memory objects in _mem_objects_ have previously been acquired using `clEnqueueAcquireD3D11ObjectsKHR` but have not been released using flink:clEnqueueReleaseD3D11ObjectsKHR. - * `CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR` when any of _mem_objects_ are currently acquired by OpenCL. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueAcquireD3D11ObjectsKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt b/man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt deleted file mode 100644 index e2750b64..00000000 --- a/man/static/clEnqueueAcquireDX9MediaSurfacesKHR.txt +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueAcquireDX9MediaSurfacesKHR(3) - -== Name - -clEnqueueAcquireDX9MediaSurfacesKHR - Acquire OpenCL memory objects that have been created from a media surface. - -== C Specification - -[source,c] ----- -cl_int clEnqueueAcquireDX9MediaSurfacesKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from media surfaces. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this particular - command and can be used to query or queue a wait for this particular - command to complete. _event_ can be NULL in which case it will not be - possible for the application to query the status of this command or - queue a wait for this command to complete. If _event_wait_list_ and - _event_ are not NULL, _event_ should not refer to an element of the - _event_wait_list_ array. - -== Notes - -Used to acquire OpenCL memory objects that have been created from a media surface. -The media surfaces are acquired by the OpenCL context associated with _command_queue_ and can therefore be used by all command-queues associated with the OpenCL context. - -OpenCL memory objects created from media surfaces must be acquired before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a media surface is used while it is not currently acquired by OpenCL, the call attempting to use that OpenCL memory object will return `CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR`. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueAcquireDX9MediaSurfacesKHR` provides the synchronization guarantee that any media adapter API calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireDX9MediaSurfacesKHR` is called will complete executing before _event_ reports completion and before the execution of any subsequent OpenCL work issued in _command_queue_ begins. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any media adapter API calls involving the interop device(s) used in the OpenCL context made before `clEnqueueAcquireDX9MediaSurfacesKHR` is called have completed before calling `clEnqueueAcquireDX9MediaSurfacesKHR`. - -include::sharingDX9Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL then the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from media surfaces. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from a device that can share the media surface referenced by _mem_objects_. - * `CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR` if memory objects in _mem_objects_ have previously been acquired using `clEnqueueAcquireDX9MediaSurfacesKHR` but have not been released using flink:clEnqueueReleaseDX9MediaSurfacesKHR. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_dx9_media_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueAcquireDX9MediaSurfacesKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueAcquireEGLObjectsKHR.txt b/man/static/clEnqueueAcquireEGLObjectsKHR.txt deleted file mode 100644 index a2f84a60..00000000 --- a/man/static/clEnqueueAcquireEGLObjectsKHR.txt +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueAcquireEGLObjectsKHR(3) - -== Name - -clEnqueueAcquireEGLObjectsKHR - Acquire OpenCL memory objects that have been created from EGL resources. - -[source,c] ----- -cl_int clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from EGL resources, within the context associate with - _command_queue_. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this command and can - be used to query or queue a wait for the command to complete. _event_ - can be NULL in which case it will not be possible for the application to - query the status of this command or queue a wait for this command to - complete. - -== Description - -This function is used to acquire OpenCL memory objects that have been created from EGL resources. -The EGL objects are acquired by the OpenCL context associated with _command_queue_ and can therefore be used by all command-queues associated with the OpenCL context. - -OpenCL memory objects created from EGL resources must be acquired before they can be used by any OpenCL commands queued to a command-queue. -If an OpenCL memory object created from a EGL resource is used while it is not currently acquired by OpenCL, the call attempting to use that OpenCL memory object will return `CL_EGL_RESOURCE_NOT_ACQUIRED_KHR`. - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and returns `CL_SUCCESS`. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects in the context associated with _command_queue_. - * `CL_INVALID_EGL_OBJECT_KHR` if memory objects in _mem_objects_ have not been created from EGL resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not NULL and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_egl_image, -reflink:cl_khr_egl_event, -flink:clEnqueueReleaseEGLObjectsKHR, -flink:clCreateFromEGLImageKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_image - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueAcquireGLObjects.txt b/man/static/clEnqueueAcquireGLObjects.txt deleted file mode 100644 index d989fd3d..00000000 --- a/man/static/clEnqueueAcquireGLObjects.txt +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueAcquireGLObjects(3) - -== Name - -clEnqueueAcquireGLObjects - Acquire OpenCL memory objects that have been created from OpenGL objects. - -== C Specification - -[source,c] ----- -cl_int clEnqueueAcquireGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. All devices used to create the - OpenCL context associated with _command_queue_ must support acquiring - shared CL/GL objects. This constraint is enforced at context creation - time. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of CL memory objects that correspond - to GL objects. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this command and can - be used to query or queue a wait for the command to complete. _event_ - can be NULL in which case it will not be possible for the application to - query the status of this command or queue a wait for this command to - complete. If _event_wait_list_ and _event_ are not NULL, _event_ should - not refer to an element of the _event_wait_list_ array. -+ --- -If the reflink:cl_khr_gl_event extension is supported, if an OpenGL context -is bound to the current thread, then any OpenGL commands which - - * affect or access the contents of a memory object listed in the - _mem_objects_ list, and - * were issued on that OpenGL context prior to the call to - `clEnqueueAcquireGLObjects` - -will complete before execution of any OpenCL commands following the -`clEnqueueAcquireGLObjects` which affect or access any of those memory -objects. If a non-NULL event object is returned, it will report completion -only after completion of such OpenGL commands. - -If the reflink:cl_khr_egl_event extension is supported, prior to calling -`clEnqueueAcquireGLObjects`, the application must ensure that any pending -EGL or EGL client API operations which access the objects specified in -_mem_objects_ have completed. --- - -== Description - -These objects need to be acquired before they can be used by any OpenCL commands queued to a command-queue. -The OpenGL objects are acquired by the OpenCL context associated with _command_queue_ and can therefore be used by all command-queues associated with the OpenCL context. - -== Notes - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and returns `CL_SUCCESS`. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from an OpenGL context. - * `CL_INVALID_GL_OBJECT` if memory objects in _mem_objects_ have not been created from a GL object(s). - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not NULL and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -reflink:cl_khr_gl_event, -flink:clEnqueueReleaseGLObjects, -flink:clCreateBuffer - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueAcquireGLObjects - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueReleaseD3D10ObjectsKHR.txt b/man/static/clEnqueueReleaseD3D10ObjectsKHR.txt deleted file mode 100644 index 7be974af..00000000 --- a/man/static/clEnqueueReleaseD3D10ObjectsKHR.txt +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueReleaseD3D10ObjectsKHR(3) - -== Name - -clEnqueueReleaseD3D10ObjectsKHR - Release OpenCL memory objects that have been created from Direct3D 10 resources. - -== C Specification - -[source,c] ----- -cl_int clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _num_objects_ - The number of memory objects to be released in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from Direct3D 10 resources. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. - -== Notes - -The Direct3D 10 objects are released by the OpenCL context associated with _command_queue_. - -OpenCL memory objects created from Direct3D 10 resources which have been acquired by OpenCL must be released by OpenCL before they may be accessed by Direct3D 10. -Accessing a Direct3D 10 resource while its corresponding OpenCL memory object is acquired is in error and will result in undefined behavior, including but not limited to possible OpenCL errors, data corruption, and program termination. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueReleaseD3D10ObjectsKHR` provides the synchronization guarantee that any calls to Direct3D 10 calls involving the interop device(s) used in the OpenCL context made after the call to `clEnqueueReleaseD3D10ObjectsKHR` will not start executing until after all events in _event_wait_list_ are complete and all work already submitted to _command_queue_ completes execution. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any Direct3D 10 calls involving the interop device(s) used in the OpenCL context made after `clEnqueueReleaseD3D10ObjectsKHR` will not start executing until after event returned by `clEnqueueReleaseD3D10ObjectsKHR` reports completion. - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from Direct3D 10 resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from an Direct3D 10 device. - * `CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR` if memory objects in _mem_objects_ have not previously been acquired using `clEnqueueAcquireD3D10ObjectsKHR`, or have been released using `clEnqueueReleaseD3D10ObjectsKHR` since the last time that they were acquired. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d10_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueReleaseD3D10ObjectsKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueReleaseD3D11ObjectsKHR.txt b/man/static/clEnqueueReleaseD3D11ObjectsKHR.txt deleted file mode 100644 index fedb562e..00000000 --- a/man/static/clEnqueueReleaseD3D11ObjectsKHR.txt +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueReleaseD3D11ObjectsKHR(3) - -== Name - -clEnqueueReleaseD3D11ObjectsKHR - Release OpenCL memory objects that have been created from Direct3D 11 resources. - -== C Specification - -[source,c] ----- -cl_int clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _num_objects_ - The number of memory objects to be released in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from Direct3D 11 resources. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. - -== Notes - -The Direct3D 11 objects are released by the OpenCL context associated with _command_queue_. - -OpenCL memory objects created from Direct3D 11 resources which have been acquired by OpenCL must be released by OpenCL before they may be accessed by Direct3D 11. -Accessing a Direct3D 11 resource while its corresponding OpenCL memory object is acquired is in error and will result in undefined behavior, including but not limited to possible OpenCL errors, data corruption, and program termination. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueReleaseD3D11ObjectsKHR` provides the synchronization guarantee that any calls to Direct3D 11 calls involving the interop device(s) used in the OpenCL context made after the call to `clEnqueueReleaseD3D11ObjectsKHR` will not start executing until after all events in _event_wait_list_ are complete and all work already submitted to _command_queue_ completes execution. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any Direct3D 11 calls involving the interop device(s) used in the OpenCL context made after `clEnqueueReleaseD3D11ObjectsKHR` will not start executing until after event returned by `clEnqueueReleaseD3D11ObjectsKHR` reports completion. - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from Direct3D 11 resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from a Direct3D 11 device. - * `CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR` if memory objects in _mem_objects_ have not previously been acquired using flink:clEnqueueAcquireD3D11ObjectsKHR, or have been released using `clEnqueueReleaseD3D11ObjectsKHR` since the last time that they were acquired. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueReleaseD3D11ObjectsKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt b/man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt deleted file mode 100644 index c5648dad..00000000 --- a/man/static/clEnqueueReleaseDX9MediaSurfacesKHR.txt +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueReleaseDX9MediaSurfacesKHR(3) - -== Name - -clEnqueueReleaseDX9MediaSurfacesKHR - Release OpenCL memory objects that have been created from media surfaces. - -== C Specification - -[source,c] ----- -cl_int clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be released in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from media surfaces. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. - * _event_ returns an event object that identifies this command and - can be used to query or wait for this command to complete. - If _event_ is `NULL` or the enqueue is unsuccessful, no event will be - created and therefore it will not be possible to query the status of this - command or to wait for this command to complete. - If _event_wait_list_ and _event_ are not `NULL`, _event_ must not refer - to an element of the _event_wait_list_ array. - -== Notes - -Used to release OpenCL memory objects that have been created from media surfaces. -The media surfaces are released by the OpenCL context associated with _command_queue_. - -OpenCL memory objects created from media surfaces which have been acquired by OpenCL must be released by OpenCL before they may be accessed by the media adapter API. -Accessing a media surface while its corresponding OpenCL memory object is acquired is in error and will result in undefined behavior, including but not limited to possible OpenCL errors, data corruption, and program termination. - -If `CL_CONTEXT_INTEROP_USER_SYNC` is not specified as `CL_TRUE` during context creation, `clEnqueueReleaseDX9MediaSurfacesKHR` provides the synchronization guarantee that any calls to media adapter APIs involving the interop device(s) used in the OpenCL context made after the call to `clEnqueueReleaseDX9MediaSurfacesKHR` will not start executing until after all events in _event_wait_list_ are complete and all work already submitted to _command_queue_ completes execution. -If the context was created with properties specifying `CL_CONTEXT_INTEROP_USER_SYNC` as `CL_TRUE`, the user is responsible for guaranteeing that any media adapter API calls involving the interop device(s) used in the OpenCL context made after `clEnqueueReleaseDX9MediaSurfacesKHR` will not start executing until after event returned by `clEnqueueReleaseDX9MediaSurfacesKHR` reports completion. - -include::sharingDX9Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and <_mem_objects_> is NULL then the function does nothing and returns `CL_SUCCESS`. -Otherwise it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects or if memory objects in _mem_objects_ have not been created from valid media surfaces. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from a media object. - * `CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR` if memory objects in _mem_objects_ have not been previously been acquired using flink:clEnqueueAcquireDX9MediaSurfacesKHR or have been released using `clEnqueueReleaseDX9MediaSurfacesKHR` since the last time that they were acquired. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and `num_event_in_wait_list` > 0, or _event_wait_list_ is not NULL and `num_event_in_wait_list` is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_dx9_media_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueReleaseDX9MediaSurfacesKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueReleaseEGLObjectsKHR.txt b/man/static/clEnqueueReleaseEGLObjectsKHR.txt deleted file mode 100644 index cddd6dcf..00000000 --- a/man/static/clEnqueueReleaseEGLObjectsKHR.txt +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueReleaseEGLObjectsKHR(3) - -== Name - -clEnqueueReleaseEGLObjectsKHR - Release OpenCL memory objects that have been created from EGL resources. - -[source,c] ----- -cl_int clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be acquired in - _mem_objects_. - * _mem_objects_ - A pointer to a list of OpenCL memory objects that were - created from EGL resources, within the context associated with - _command_queue_. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - Specify events that need to complete before this - particular command can be executed. If _event_wait_list_ is NULL, then - this particular command does not wait on any event to complete. If - _event_wait_list_ is NULL, _num_events_in_wait_list_ must be 0. If - _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this command and can - be used to query or queue a wait for the command to complete. _event_ - can be NULL in which case it will not be possible for the application to - query the status of this command or queue a wait for this command to - complete. - -== Description - -This function is used to release OpenCL memory objects that have been -created from EGL resources. The EGL objects are released by the OpenCL -context associated with _command_queue_. - -OpenCL memory objects created from EGL resources which have been acquired by -OpenCL must be released by OpenCL before they may be accessed by EGL or by -EGL client APIs. - -Accessing a EGL resource while its corresponding OpenCL memory object is -acquired is in error and will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. If -_num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and -returns `CL_SUCCESS`. Otherwise, it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects in the context associated with _command_queue_. - * `CL_INVALID_EGL_OBJECT_KHR` if memory objects in _mem_objects_ have not been created from EGL resources. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not NULL and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_egl_image, -reflink:cl_khr_egl_event, -flink:clEnqueueAcquireEGLObjectsKHR, -flink:clCreateFromEGLImageKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_image - -== Copyright - -include::footer.txt[] diff --git a/man/static/clEnqueueReleaseGLObjects.txt b/man/static/clEnqueueReleaseGLObjects.txt deleted file mode 100644 index 8f6ee5db..00000000 --- a/man/static/clEnqueueReleaseGLObjects.txt +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clEnqueueReleaseGLObjects(3) - -== Name - -clEnqueueReleaseGLObjects - Release OpenCL memory objects that have been created from OpenGL objects. - -== C Specification - -[source,c] ----- -cl_int clEnqueueReleaseGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem *mem_objects, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) ----- - -== Parameters - - * _command_queue_ - A valid command-queue. - * _num_objects_ - The number of memory objects to be released in - _mem_objects_. - * _mem_objects_ - A pointer to a list of CL memory objects that correspond - to GL objects. - * _num_events_in_wait_list_ - Number of events in _event_wait_list_. - * _event_wait_list_ - These parameters specify events that need to - complete before this command can be executed. If _event_wait_list_ is - NULL, then this particular command does not wait on any event to - complete. If _event_wait_list_ is NULL, _num_events_in_wait_list_ must - be 0. If _event_wait_list_ is not NULL, the list of events pointed to by - _event_wait_list_ must be valid and _num_events_in_wait_list_ must be - greater than 0. The events specified in _event_wait_list_ act as - synchronization points. - * _event_ - Returns an event object that identifies this particular - read/write command and can be used to query or queue a wait for the - command to complete. _event_ can be NULL in which case it will not be - possible for the application to query the status of this command or - queue a wait for this command to complete. If _event_wait_list_ and - _event_ are not NULL, _event_ should not refer to an element of the - _event_wait_list_ array. -+ --- -If the reflink:cl_khr_gl_event extension is supported, if an OpenGL context -is bound to the current thread, then then any OpenGL commands which - - * affect or access the contents of the memory objects listed in the - _mem_objects_ list, and - * are issued on that context after the call to `clEnqueueReleaseGLObjects` - -will not execute until after execution of any OpenCL commands preceding the -`clEnqueueReleaseGLObjects` which affect or access any of those memory -objects. If a non-NULL event object is returned, it will report completion -before execution of such OpenGL commands. --- - -== Description - -Release OpenCL memory objects that have been created from OpenGL objects. -These objects need to be released before they can be used by OpenGL. -The OpenGL objects are released by the OpenCL context associated with _command_queue_. - -== Notes - -If the reflink:cl_khr_gl_sharing extension is supported and if an OpenGL context is bound to the current thread, then any OpenGL commands which does: - - * affect or access the contents of a memory object listed in the _mem_objects_ list, and - * are issued on that context after the call to `clEnqueueReleaseGLObjects` - -will not execute until after execution of any OpenCL commands preceding the `clEnqueueReleaseGLObjects` which affect or access any of those memory objects. -If a non-NULL _event_ object is returned, it will report completion before execution of such OpenGL commands. - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - -== Errors - -`clEnqueueReleaseGLObjects` returns `CL_SUCCESS` if the function is executed successfully. -If _num_objects_ is 0 and _mem_objects_ is NULL the function does nothing and returns `CL_SUCCESS`. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_VALUE` if _num_objects_ is zero and _mem_objects_ is not a NULL value or if _num_objects_ > 0 and _mem_objects_ is NULL. - * `CL_INVALID_MEM_OBJECT` if memory objects in _mem_objects_ are not valid OpenCL memory objects. - * `CL_INVALID_COMMAND_QUEUE` if _command_queue_ is not a valid command-queue. - * `CL_INVALID_CONTEXT` if context associated with _command_queue_ was not created from an OpenGL context. - * `CL_INVALID_GL_OBJECT` if memory objects in _mem_objects_ have not been created from a GL object(s). - * `CL_INVALID_EVENT_WAIT_LIST` if _event_wait_list_ is NULL and _num_events_in_wait_list_ > 0, or _event_wait_list_ is not NULL and _num_events_in_wait_list_ is 0, or if event objects in _event_wait_list_ are not valid events. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -flink:clEnqueueAcquireGLObjects, -reflink:cl_khr_gl_event - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clEnqueueReleaseGLObjects - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetDeviceIDsFromD3D10KHR.txt b/man/static/clGetDeviceIDsFromD3D10KHR.txt deleted file mode 100644 index 80941348..00000000 --- a/man/static/clGetDeviceIDsFromD3D10KHR.txt +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetDeviceIDsFromD3D10KHR(3) - -== Name - -clGetDeviceIDsFromD3D10KHR - Querying OpenCL Devices Corresponding to Direct3D 10 Devices. - -== C Specification - -[source,c] ----- -cl_int clGetDeviceIDsFromD3D10KHR(cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void *d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id *devices, - cl_uint *num_devices) ----- - -== Parameters - - * _platform_ - Refers to the platform ID returned by - flink:clGetPlatformIDs. - * _d3d_device_source_ - Specifies the type of _d3d_object_ and may be one - of the following:. -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_d3d_device_source_khr | Type of _d3d_object_ -| `CL_D3D10_DEVICE_KHR` | `ID3D10Device` * -| `CL_D3D10_DXGI_ADAPTER_KHR` | `IDXGIAdapter` * -|==== --- - * _d3d_object_ - Specifies the object whose corresponding OpenCL devices - are being queried. The type of _d3d_object_ must be as specified in the - table above. - * d3d_device_set* - Specifies the set of devices to return, and must be - one of the following: -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_d3d_device_set_khr | Devices returned in _devices_ -| `CL_PREFERRED_DEVICES_FOR_D3D10_KHR` - | The OpenCL devices associated with the specified Direct3D object. -| `CL_ALL_DEVICES_FOR_D3D10_KHR` - | All OpenCL devices which may interoperate with the specified Direct3D object. - Performance of sharing data on these devices may be considerably less than on the preferred devices. -|==== --- - * _num_entries_ - The number of `cl_device_id` entries that can be added - to _devices_. If _devices_ is not NULL, the _num_entries_ must be - greater than zero. - * _devices_ - Returns a list of OpenCL devices found. The `cl_device_id` - values returned in _devices_ can be used to identify a specific OpenCL - device. If _devices_ is NULL, this argument is ignored. The number of - OpenCL devices returned is the mininum of the value specified by - _num_entries_ and the number of OpenCL devices corresponding to - _d3d_object_. - * _num_devices_ - Returns the number of OpenCL devices available that - correspond to _d3d_object_. If _num_devices_ is NULL, this argument is - ignored. - -== Description - -The OpenCL devices corresponding to a Direct3D 10 device may be queried. -The OpenCL devices corresponding to a DXGI adapter may also be queried. -The OpenCL devices corresponding to a Direct3D 10 device will be a subset of the OpenCL devices corresponding to the DXGI adapter against which the Direct3D 10 device was created. - -== Notes - -include::sharingD3D10Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -Otherwise it may return: - - * `CL_INVALID_PLATFORM` if _platform_ is not a valid platform. - * `CL_INVALID_VALUE` if _d3d_device_source_ is not a valid value, _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero and _devices_ is not NULL, or if both _num_devices_ and _devices_ are NULL. - * `CL_DEVICE_NOT_FOUND` if no OpenCL devices that correspond to _d3d_object_ were found. - -== See Also - -reflink:cl_khr_d3d10_sharing, -flink:clCreateFromD3D10BufferKHR, -flink:clCreateFromD3D10Texture2DKHR, -flink:clCreateFromD3D10Texture3DKHR, -flink:clEnqueueAcquireD3D10ObjectsKHR, -flink:clEnqueueReleaseD3D10ObjectsKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clGetDeviceIDsFromD3D10KHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetDeviceIDsFromD3D11KHR.txt b/man/static/clGetDeviceIDsFromD3D11KHR.txt deleted file mode 100644 index cd6e0f60..00000000 --- a/man/static/clGetDeviceIDsFromD3D11KHR.txt +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetDeviceIDsFromD3D11KHR(3) - -== Name - -clGetDeviceIDsFromD3D11KHR - Querying OpenCL Devices Corresponding to Direct3D 11 Devices. - -== C Specification - -[source,c] ----- -cl_int clGetDeviceIDsFromD3D11KHR(cl_platform_id platform, - cl_d3d11_device_source_khr d3d_device_source, - void *d3d_object, - cl_d3d11_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id *devices, - cl_uint *num_devices) ----- - -== Parameters - - * _platform_ - Refers to the platform ID returned by - flink:clGetPlatformIDs. - * _d3d_device_source_ - Specifies the type of _d3d_object_ and may be one - of the following (Table 9.11.1): -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_d3d_device_source_khr | Type of _d3d_object_ -| `CL_D3D11_DEVICE_KHR` | ID3D11Device * -| `CL_D3D11_DXGI_ADAPTER_KHR` | IDXGIAdapter * -|==== --- - * _d3d_object_ - Specifies the object whose corresponding OpenCL devices - are being queried. The type of _d3d_object_ must be as specified in the - table above. - * _d3d_device_set_ - Specifies the set of devices to return, and must be - one of the following (Table 9.11.2): -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_d3d_device_set_khr | Devices returned in _devices_ -| `CL_PREFERRED_DEVICES_FOR_D3D11_KHR` - | The OpenCL devices associated with the specified Direct3D object. -| `CL_ALL_DEVICES_FOR_D3D11_KHR` - | All OpenCL devices which may interoperate with the specified Direct3D - object. Performance of sharing data on these devices may be - considerably less than on the preferred devices. -|==== --- - * _num_entries_ - The number of `cl_device_id` entries that can be added - to _devices_. If _devices_ is not NULL, the _num_entries_ must be - greater than zero. - * _devices_ - Returns a list of OpenCL devices found. The `cl_device_id` - values returned in _devices_ can be used to identify a specific OpenCL - device. If _devices_ is NULL, this argument is ignored. The number of - OpenCL devices returned is the mininum of the value specified by - _num_entries_ and the number of OpenCL devices corresponding to - _d3d_object_. - * _num_devices_ - Returns the number of OpenCL devices available that - correspond to _d3d_object_. If _num_devices_ is NULL, this argument is - ignored. - -== Description - -The OpenCL devices corresponding to a Direct3D 11 device may be queried. -The OpenCL devices corresponding to a DXGI adapter may also be queried. -The OpenCL devices corresponding to a Direct3D 11 device will be a subset of the OpenCL devices corresponding to the DXGI adapter against which the Direct3D 11 device was created. - -== Notes - -include::sharingD3D11Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -Otherwise it may return: - - * `CL_INVALID_PLATFORM` if _platform_ is not a valid platform. - * `CL_INVALID_VALUE` if _d3d_device_source_ is not a valid value, _d3d_device_set_ is not a valid value, _num_entries_ is equal to zero and _devices_ is not NULL, or if both _num_devices_ and _devices_ are NULL. - * `CL_DEVICE_NOT_FOUND` if no OpenCL devices that correspond to _d3d_object_ were found. - -== See Also - -reflink:cl_khr_d3d11_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clGetDeviceIDsFromD3D10KHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt b/man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt deleted file mode 100644 index 58b265d3..00000000 --- a/man/static/clGetDeviceIDsFromDX9MediaAdapterKHR.txt +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetDeviceIDsFromDX9MediaAdapterKHR(3) - -== Name - -clGetDeviceIDsFromDX9MediaAdapterKHR - Query a media adapter for any associated OpenCL devices. - -== C Specification - -[source,c] ----- -cl_int clGetDeviceIDsFromDX9MediaAdapterKHR(cl_platform_id platform, - cl_uint num_media_adapters, - cl_dx9_media_adapter_type_khr *media_adapters_type, - void *media_adapters, - cl_dx9_media_adapter_set_khr media_adapter_set, - cl_uint num_entries, - cl_device_id *devices, - cl_int *num_devices) ----- - -== Parameters - - * _platform_ - Refers to the platform ID returned by - flink:clGetPlatformIDs. - * _num_media_adapters_ - Specifies the number of media adapters. - * _media_adapters_type_ - An array of _num_media_adapters_ entries. Each - entry specifies the type of media adapter and must be one of the values - described in the table (Table 9.10.1) below. -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_dx9_media_adapter_type_khr | Type of media adapters -| `CL_ADAPTER_D3D9_KHR` | IDirect3DDevice9 * -| `CL_ADAPTER_D3D9EX_KHR` | IDirect3DDevice9Ex * -| `CL_ADAPTER_DXVA_KHR` | IDXVAHD_Device * -|==== --- - * _media_adapters_ - An array of _num_media_adapters_ entries. Each entry - specifies the actual adapter whose type is specified by - `media_adapter_type`. The _media_adapters_ must be one of the types - describes in the table above. - * _media_adapter_set_ - Specifies the set of adapters to return and must - be one of the values described in the table (Table 9.10.2) below. -+ --- -[cols="1a,1a", options="header"] -|==== -| cl_dx9_media_adapter_set_khr | Description -| `CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR` - | The preferred OpenCL devices associated with the media adapter. -| `CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR` | -| `CL_ALL_DEVICES_FOR_MEDIA_DX9_ADAPTER_KHR` - | All OpenCL devices that may interoperate with the media adapter. -|==== --- - * _num_entries_ - The number of cl_device_id entries that can be added to - _devices_. If _devices_ is not NULL, the _num_entries_ must be greater - than zero. - * _devices_ - Returns a list of OpenCL devices found that support the list - of media adapters specified. The `cl_device_id` values returned in - _devices_ can be used to identify a specific OpenCL device. If _devices_ - is NULL, this argument is ignored. The number of OpenCL devices returned - is the minimum of the value specified by _num_entries_ or the number of - OpenCL devices whose type matches `device_type`. - * _num_devices_ - Returns the number of OpenCL devices. If _num_devices_ - is NULL, this argument is ignored. - -== Description - -Queries a media adapter for any associated OpenCL devices. -Adapters with associated OpenCL devices can enable media surface sharing between the two. - -== Notes - -include::sharingDX9Inc.txt[] - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_PLATFORM` if _platform_ is not a valid platform. - * `CL_INVALID_VALUE` if _num_media_adapters_ is zero or if _media_adapters_type_ is NULL or if _media_adapters_ is NULL. - * `CL_INVALID_VALUE` if any of the entries in _media_adapters_type_ or _media_adapters_ is not a valid value. - * `CL_INVALID_VALUE` if _media_adapter_set_ is not a valid value. - * `CL_INVALID_VALUE` if _num_entries_ is equal to zero and _devices_ is not NULL or if both _num_devices_ and _devices_ are NULL. - * `CL_DEVICE_NOT_FOUND` if no OpenCL devices that correspond to adapters specified in _media_adapters_ and _media_adapters_type_ were found. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_dx9_media_sharing - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clGetDeviceIDsFromDX9MediaAdapterKHR - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetGLContextInfoKHR.txt b/man/static/clGetGLContextInfoKHR.txt deleted file mode 100644 index df2534e1..00000000 --- a/man/static/clGetGLContextInfoKHR.txt +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetGLContextInfoKHR(3) - -== Name - -clGetGLContextInfoKHR - Get OpenGL context information. - -[source,c] ----- -cl_int clGetGLContextInfoKHR(const cl_context_properties *properties, - cl_gl_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) ----- - - -== Parameters - - * _properties_ - Points to an attribute list whose format and valid - contents are identical to the _properties_ argument of - flink:clCreateContext. _properties_ must identify a single valid GL - context or GL share group object. - * _param_name_ - A constant that specifies the GL context information to - query, and must be one of the values shown in the table below. - * _param_value_size_ - Specifies the size in bytes of memory pointed to by - _param_value_. This size must be greater than or equal to the size of - the return type described in the table below. - * _param_value_ - A pointer to memory where the result of the query is - returned as described in the table below. If _param_value_ is NULL, it - is ignored. - * _param_value_size_ret_ - Returns the actual size in bytes of data being - queried by _param_value_. If _param_value_size_ret_ is NULL, it is - ignored. - -== Description - -Get OpenGL context information. - -[cols="1a,1a,1a", options="header"] -|=== -| _param_name_ | Return Type | Information returned in param_value -| `CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR` | `cl_device_id` - | Return the CL device currently associated with the specified OpenGL context. -| `CL_DEVICES_FOR_GL_CONTEXT_KHR` | `cl_device_id[]` - | List of all CL devices which may be associated with the specified OpenGL context. -|=== - -== Notes - -include::gl_sharingInc.txt[] - -include::gl_formatsInc.txt[] - -include::gl_lifetimeInc.txt[] - -include::gl_syncInc.txt[] - - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. If no -device(s) exist corresponding to _param_name_, the call will not fail, but -the value of _param_value_size_ret_ will be zero. Otherwise returns one of -the following: - - * {blank} -+ --- -`CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR` if a context was specified by any -of the following means: - - * A context was specified for an EGL-based OpenGL ES or OpenGL implementation by setting the attributes `CL_GL_CONTEXT_KHR` and `CL_EGL_DISPLAY_KHR` - * A context was specified for a GLX-based OpenGL implementation by setting the attributes `CL_GL_CONTEXT_KHR` and `CL_GLX_DISPLAY_KHR` - * A context was specified for a WGL-based OpenGL implementation by setting the attributes `CL_GL_CONTEXT_KHR` and `CL_WGL_HDC_KHR` - -and any of the following conditions hold: - - * The specified display and context attributes do not identify a valid OpenGL or OpenGL ES context. - * The specified context does not support buffer and renderbuffer objects. - * The specified context is not compatible with the OpenCL context being created (for example, it exists in a physically distinct address space, such as another hardware device; or it does not support sharing data with OpenCL due to implementation restrictions). --- - * `CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR` if a share group was specified - for a CGL-based OpenGL implementation by setting the attribute - `CL_CGL_SHAREGROUP_KHR`, and the specified share group does not identify a - valid CGL share group object. - - * {blank} -+ --- -`CL_INVALID_OPERATION` if a context was specified as described above and any of the following conditions hold: - - * A context or share group object was specified for one of CGL, EGL, GLX, or WGL and the OpenGL implementation does not support that window-system binding API. - * More than one of the attributes `CL_CGL_SHAREGROUP_KHR`, `CL_EGL_DISPLAY_KHR`, `CL_GLX_DISPLAY_KHR`, and `CL_WGL_HDC_KHR` is set to a non-default value. - * Both of the attributes `CL_CGL_SHAREGROUP_KHR` and `CL_GL_CONTEXT_KHR` are set to non-default values. - * Any of the devices specified in _devices_ cannot support OpenCL objects which share the data store of an OpenGL object. --- - * `CL_INVALID_VALUE` if an attribute name other than those specified in - the table of supported _properties_ for flink:clCreateContext, - * `CL_INVALID_VALUE` if _param_name_ is not one of the values listed in - the table above; or if the size in bytes specified by _param_value_size_ - is less than the size of the return type shown in the table above, and - _param_value_ is not a NULL value. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources - required by the OpenCL implementation on the device - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources - required by the OpenCL implementation on the host. - - -== See Also - -flink:clCreateContext, -reflink:cl_khr_gl_sharing, -reflink:cl_khr_gl_event - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetGLObjectInfo.txt b/man/static/clGetGLObjectInfo.txt deleted file mode 100644 index 1d7b7bbc..00000000 --- a/man/static/clGetGLObjectInfo.txt +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetGLObjectInfo(3) - -== Name - -clGetGLObjectInfo - Query an OpenGL object used to create an OpenCL memory object. - -== C Specification - -[source,c] ----- -cl_int clGetGLObjectInfo(cl_mem memobj, - cl_gl_object_type *gl_object_type, - GLuint *gl_object_name) ----- - -== Parameters - - * _memobj_ - An OpenCL memory object handle. - * _gl_object_type_ - Returns the type of GL object attached to _memobj_ - and can be `CL_GL_OBJECT_BUFFER`, `CL_GL_OBJECT_TEXTURE2D`, - `CL_GL_OBJECT_TEXTURE3D`, `CL_GL_OBJECT_TEXTURE2D_ARRAY`, - `CL_GL_OBJECT_TEXTURE1D`, `CL_GL_OBJECT_TEXTURE1D_ARRAY`, - `CL_GL_OBJECT_TEXTURE_BUFFER`, or `CL_GL_OBJECT_RENDERBUFFER`. If - _gl_object_type_ is NULL, it is ignored. - * _gl_object_name_ - Returns the GL object name used to create _memobj_. - If _gl_object_name_ is NULL, it is ignored. - -== Description - -The OpenGL object used to create the OpenCL memory object and information about the object type i.e. -whether it is a texture, renderbuffer, or buffer object can be queried using this function. - -== Errors - -Returns `CL_SUCCESS` if the call was executed successfully. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_MEM_OBJECT` if _memobj_ is not a valid OpenCL memory object. - * `CL_INVALID_GL_OBJECT` if there is no GL object associated with _memobj_. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -flink:clGetGLTextureInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clGetGLObjectInfo - -== Copyright - -include::footer.txt[] diff --git a/man/static/clGetGLTextureInfo.txt b/man/static/clGetGLTextureInfo.txt deleted file mode 100644 index dffef2e6..00000000 --- a/man/static/clGetGLTextureInfo.txt +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clGetGLTextureInfo(3) - -== Name - -clGetGLTextureInfo - Returns additional information about the GL texture object associated with a memory object. - -== C Specification - -[source,c] ----- -cl_int clGetGLTextureInfo(cl_mem memobj, - cl_gl_texture_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) ----- - -== Parameters - - * _memobj_ - An OpenCL memory object handle. - * _param_name_ - Specifies what additional information about the GL - texture object associated with _memobj_ to query. The list of supported - _param_name_ types and the information returned in _param_value_ by - `clGetGLTextureInfo` is described in the table below (Table 9.5). - * _param_value_ - A pointer to memory where the result being queried is - returned. If _param_value_ is NULL, it is ignored. - * _param_value_size_ - Specifies the size in bytes of memory pointed to by - _param_value_. This size must be {geq} size of return type as described - in the table below. - * _param_value_size_ret_ - Returns the actual size in bytes of data copied - to _param_value_. If _param_value_size_ret_ is NULL, it is ignored. -+ --- -Table 9.5: - -[cols="1a,1a,1a", options="header"] -|==== -| cl_gl_texture_info | Return Type | Information returned in _param_value_ -| `CL_GL_TEXTURE_TARGET` | GLenum - | The _texture_target_ argument specified in - flink:clCreateFromGLTexture. -| `CL_GL_MIPMAP_LEVEL` | GLint - | The _miplevel_ argument specified in flink:clCreateFromGLTexture. -| `CL_GL_NUM_SAMPLES` | GLsizei - | If the reflink:cl_khr_gl_msaa_sharing extension is supported, the - _samples_ argument passed to `glTexImage2DMultisample` or - `glTexImage3DMultisample`. If _image_ is not a MSAA texture, 1 is - returned. -|==== --- - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully. -Otherwise, it returns one of the following errors: - - * `CL_INVALID_MEM_OBJECT` if _memobj_ is not a valid OpenCL memory object - * `CL_INVALID_GL_OBJECT` if there is no GL texture object associated with _memobj_. - * `CL_INVALID_VALUE` if _param_name_ is not valid, or if size in bytes specified by _param_value_size_ is < size of return type as described in the table above and _param_value_ is not NULL, or if _param_value_ and _param_value_size_ret_ are NULL. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources required by the OpenCL implementation on the host. - -== See Also - -reflink:cl_khr_gl_sharing, -flink:clGetGLObjectInfo, -flink:clCreateFromGLTexture, -flink:clCreateFromGLTexture - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#clGetGLTextureInfo - -== Copyright - -include::footer.txt[] diff --git a/man/static/clIcdGetPlatformIDsKHR.txt b/man/static/clIcdGetPlatformIDsKHR.txt deleted file mode 100644 index 7c937659..00000000 --- a/man/static/clIcdGetPlatformIDsKHR.txt +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clIcdGetPlatformIDsKHR(3) - -== Name - -clIcdGetPlatformIDsKHR - Obtain the list of platforms accessible through the Khronos ICD Loader. - -[source,c] ----- -cl_int clIcdGetPlatformIDsKHR(cl_uint num_entries, - cl_platform_id *platforms, - cl_uint *num_platforms) ----- - - -== Parameters - - * _num_entries_ - The number of `cl_platform_id` entries that can be added - to _platforms_. If _platforms_ is not NULL, then _num_entries_ must be - greater than zero. - * _platforms_ - Returns a list of OpenCL platforms available for access - through the Khronos ICD Loader. The `cl_platform_id` values returned in - _platforms_ are ICD compatible and can be used to identify a specific - OpenCL platform. If _platforms_ is NULL, then this argument is ignored. - The number of OpenCL platforms returned is the minimum of the value - specified by _num_entries_ or the number of OpenCL platforms available. - * _num_platforms_ - Returns the number of OpenCL platforms available. If - _num_platforms_ is NULL, then this argument is ignored. - -== Notes - -This function is enabled by the -reflink:cl_khr_icd extension. - -== Errors - -Returns `CL_SUCCESS` if the function is executed successfully and there are -a non zero number of platforms available, else it returns on the errors -below: - - * `CL_PLATFORM_NOT_FOUND_KHR` if zero platforms are available. - * `CL_INVALID_VALUE` if _num_entries_ is equal to zero and _platforms_ is not NULL or if both _num_platforms_ and _platforms_ are NULL. - -== See Also - -flink:clGetPlatformIDs, -reflink:cl_khr_icd - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_icd - -== Copyright - -include::footer.txt[] diff --git a/man/static/clTerminateContextKHR.txt b/man/static/clTerminateContextKHR.txt deleted file mode 100644 index 8b0abcbc..00000000 --- a/man/static/clTerminateContextKHR.txt +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= clTerminateContextKHR(3) - -== Name - -clTerminateContextKHR - Terminates all pending work associated with the context and renders all data owned by the context invalid. - -[source,c] ----- -cl_int clTerminateContextKHR(cl_context context) ----- - -== Parameters - -== Notes - -It is the responsibility of the application to release all objects -associated with the context being terminated. - -When a context is terminated: - - * The execution status of enqueued commands will be `CL_TERMINATED_KHR`. - Event objects can be queried using - flink:clGetEventInfo. Event callbacks can be - registered and registered event callbacks will be called with - `event_command_status` set to `CL_TERMINATED_KHR`. - flink:clWaitForEvents will return as immediately - for commands associated with event objects specified in `event_list`. - The status of user events can be set. Event objects can be retained and - released. flink:clGetEventProfilingInfo - returns `CL_PROFILING_INFO_NOT_AVAILABLE`. - * The context is considered to be terminated. A callback function - registered when the context was created will be called. Only queries, - retain and release operations can be performed on the context. All other - APIs that use a context as an argument will return - `CL_CONTEXT_TERMINATED_KHR`. - * The contents of the memory regions of the memory objects is undefined. - Queries, registering a destructor callback, retain and release - operations can be performed on the memory objects. - * Once a context has been terminated, all OpenCL API calls that create - objects or enqueue commands will return `CL_CONTEXT_TERMINATED_KHR`. - APIs that release OpenCL objects will continue to operate as though - `clTerminateContextKHR` was not called. - * The behavior of callbacks will remain unchanged, and will report - appropriate error, if executing after termination of context. This - behavior is similar to enqueued commands, after the command-queue has - become invalid. - -An implementation that supports this extension must be able to terminate -commands currently executing on devices or queued across all command-queues -associated with the context that is being terminated. The implementation -cannot implement this extension by waiting for currently executing (or -queued) commands to finish execution on devices associated with this context -(i.e. doing a flink:clFinish. - -In Table 4.5 (see flink:clCreateContext), `CL_CONTEXT_TERMINATE_KHR` -can be specified in the context properties only if all devices associated -with the context support the ability to support context termination (i.e. -`CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR` is set for -`CL_DEVICE_TERMINATE_CAPABILITY_KHR`). Otherwise, context creation fails -with error code of `CL_INVALID_PROPERTY`. - -== Errors - -`clTerminateContextKHR` returns `CL_SUCCESS` if the function is executed -successfully. Otherwise, it returns one of the following errors: - - * `CL_INVALID_CONTEXT` if _context_ is not a valid OpenCL context. - * `CL_CONTEXT_TERMINATED_KHR` if _context_ has already been terminated. - * `CL_INVALID_OPERATION` if _context_ was not created with - `CL_CONTEXT_TERMINATE_KHR` set to `CL_TRUE`. - * `CL_OUT_OF_RESOURCES` if there is a failure to allocate resources - required by the OpenCL implementation on the device. - * `CL_OUT_OF_HOST_MEMORY` if there is a failure to allocate resources - required by the OpenCL implementation on the host. - -// == See Also - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_terminate_context - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_3d_image_writes.txt b/man/static/cl_khr_3d_image_writes.txt deleted file mode 100644 index 474126f4..00000000 --- a/man/static/cl_khr_3d_image_writes.txt +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_3d_image_writes(3) - -== Name - -cl_khr_3d_image_writes - Extension to enable writes to 3D image memory objects. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable ----- - -== See Also - -reflink:EXTENSION, -reflink:cl_image_format, -reflink:imageWriteFunctions - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_3d_image_writes - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_byte_addressable_store.txt b/man/static/cl_khr_byte_addressable_store.txt deleted file mode 100644 index 9265f1aa..00000000 --- a/man/static/cl_khr_byte_addressable_store.txt +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_byte_addressable_store(3) - -== Name - -cl_khr_byte_addressable_store - deprecated extension - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable ----- - -== Description - -This extension was promoted to OpenCL 1.1 core. - -// == See Also - -== Document Notes - -For more information, see the OpenCL API Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#changes_to_opencl - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_d3d10_sharing.txt b/man/static/cl_khr_d3d10_sharing.txt deleted file mode 100644 index 11ee4ced..00000000 --- a/man/static/cl_khr_d3d10_sharing.txt +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_d3d10_sharing(3) - -== Name - -cl_khr_d3d10_sharing - Provide interoperability between OpenCL and Direct3D 10. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_d3d10_sharing : enable ----- - -== Description - -If the `cl_khr_d3d10_sharing` extension is supported, then the following functions are enabled: - - * flink:clGetDeviceIDsFromD3D10KHR - * flink:clCreateFromD3D10BufferKHR - * flink:clCreateFromD3D10Texture2DKHR - * flink:clCreateFromD3D10Texture3DKHR - * flink:clEnqueueAcquireD3D10ObjectsKHR - * flink:clEnqueueReleaseD3D10ObjectsKHR - -include::sharingD3D10Inc.txt[] - -== See Also - -reflink:EXTENSION, -flink:clGetPlatformInfo, -flink:clGetDeviceInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_d3d10_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_d3d11_sharing.txt b/man/static/cl_khr_d3d11_sharing.txt deleted file mode 100644 index edf6faed..00000000 --- a/man/static/cl_khr_d3d11_sharing.txt +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_d3d11_sharing(3) - -== Name - -cl_khr_d3d11_sharing - Provide interoperability between OpenCL and Direct3D 11. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_d3d11_sharing : enable ----- - -== Description - -The goal of this extension is to provide interoperability between OpenCL and Direct3D 11. -This is designed to function analogously to the reflink:cl_khr_gl_sharing as defined in sections 9.7 and 9.8. -If this extension is supported by an implementation, the string "cl_khr_d3d11_sharing" will be present in the `CL_PLATFORM_EXTENSIONS` or `CL_DEVICE_EXTENSIONS` string described in the table of allowed values for _param_name_ for flink:clGetDeviceInfo or -flink:clGetPlatformInfo. - -As currently proposed, the interfaces for this extension are provided in the header file `cl_d3d11.h`. - -If the `cl_khr_d3d11_sharing` extension is supported, then the following functions are enabled: - - * flink:clGetDeviceIDsFromD3D11KHR - * flink:clCreateFromD3D11BufferKHR - * flink:clCreateFromD3D11Texture2DKHR - * flink:clCreateFromD3D11Texture3DKHR - * flink:clEnqueueAcquireD3D11ObjectsKHR - * flink:clEnqueueReleaseD3D11ObjectsKHR - -The OpenCL functions enabled by the `cl_khr_d3d11_sharing` extension allow applications to use Direct3D 11 resources as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and Direct3D 11. -The OpenCL API may be used to execute kernels that read and/or write memory objects that are also Direct3D 11 resources. -An OpenCL image object may be created from a Direct3D 11 texture resource. -An OpenCL buffer object may be created from a Direct3D 11 buffer resource. -OpenCL memory objects may be created from Direct3D 11 objects if and only if the OpenCL context has been created from a Direct3D 11 device. - -.Lifetime of Shared [D3D11] Objects - -An OpenCL memory object created from a Direct3D 11 resource remains valid as long as the corresponding Direct3D 11 resource has not been deleted. -If the Direct3D 11 resource is deleted through the Direct3D 11 API, subsequent use of the OpenCL memory object will result in undefined behavior, including but not limited to possible OpenCL errors, data corruption, and program termination. - -The successful creation of a cl_context against a Direct3D 11 device specified via the context create parameter `CL_CONTEXT_D3D11_DEVICE_KHR` will increment the internal Direct3D reference count on the specified Direct3D 11 device. -The internal Direct3D reference count on that Direct3D 11 device will be decremented when the OpenCL reference count on the returned OpenCL context drops to zero. - -The OpenCL context and corresponding command-queues are dependent on the existence of the Direct3D 11 device from which the OpenCL context was created. -If the Direct3D 11 device is deleted through the Direct3D 11 API, subsequent use of the OpenCL context will result in undefined behavior, including but not limited to possible OpenCL errors, data corruption, and program termination. - -== See Also - -reflink:EXTENSION, -flink:clGetDeviceIDsFromD3D11KHR, -flink:clCreateFromD3D11BufferKHR, -flink:clCreateFromD3D11Texture2DKHR, -flink:clCreateFromD3D11Texture3DKHR, -flink:clEnqueueAcquireD3D10ObjectsKHR, -flink:clEnqueueReleaseD3D11ObjectsKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_d3d11_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_depth_images.txt b/man/static/cl_khr_depth_images.txt deleted file mode 100644 index e333c960..00000000 --- a/man/static/cl_khr_depth_images.txt +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_depth_images(3) - -== Name - -cl_khr_depth_images - deprecated extension - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_depth_images : enable ----- - -== Description - -This extension was promoted to OpenCL 2.0 core. - -// == See Also - -== Document Notes - -For more information, see the OpenCL API Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_API.html#changes_to_opencl - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_device_enqueue_local_arg_types.txt b/man/static/cl_khr_device_enqueue_local_arg_types.txt deleted file mode 100644 index 88c465fd..00000000 --- a/man/static/cl_khr_device_enqueue_local_arg_types.txt +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_device_enqueue_local_arg_types(3) - -== Name - -cl_khr_device_enqueue_local_arg_types - Allows arguments to blocks passed to enqueue_kernel functions to be declared as a pointer to any type in local memory. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_device_enqueue_local_arg_types : enable ----- - -== Description - -This extension allows arguments to blocks passed to enqueue_kernel functions to be declared as a pointer to any type (built-in or user-defined) in local memory instead of just `local void *`. - -If this extension is supported by an implementation, the string cl_khr_device_enqueue_local_arg_types will be present in the `CL_DEVICE_EXTENSIONS` string described in table 4.3 (see flink:clGetDeviceInfo). - -== See Also - -flink:clGetDeviceInfo, -reflink:enqueue_kernel, -reflink:get_kernel_work_group_size - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_device_enqueue_local_arg_types - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_dx9_media_sharing.txt b/man/static/cl_khr_dx9_media_sharing.txt deleted file mode 100644 index 84f89236..00000000 --- a/man/static/cl_khr_dx9_media_sharing.txt +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_dx9_media_sharing(3) - -== Name - -cl_khr_dx9_media_sharing - Provide sharing of data between OpenCL and DX9. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_dx9_media_sharing : enable ----- - -== Description - -If the `cl_khr_dx9_media_sharing` extension is supported, then the following functions are enabled: - - * flink:clGetDeviceIDsFromDX9MediaAdapterKHR - * flink:clCreateFromDX9MediaSurfaceKHR - * flink:clEnqueueAcquireDX9MediaSurfacesKHR - * flink:clEnqueueReleaseDX9MediaSurfacesKHR - -include::sharingDX9Inc.txt[] - -== See Also - -reflink:EXTENSION, -flink:clGetPlatformInfo, -flink:clGetDeviceInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_dx9_media_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_egl_event.txt b/man/static/cl_khr_egl_event.txt deleted file mode 100644 index e182c857..00000000 --- a/man/static/cl_khr_egl_event.txt +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_egl_event(3) - -== Name - -cl_khr_egl_event - Create CL event objects linked to EGL fence sync objects. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_egl_event : enable ----- - -== Description - -This extension allows creating OpenCL event objects linked to EGL fence sync objects, potentially improving efficiency of sharing images and buffers between the two APIs. -The companion EGL_KHR_cl_event extension provides the complementary functionality of creating an EGL sync object from an OpenCL event object. - -If this extension is supported by an implementation, the string `cl_khr_egl_event` will be present in the `CL_PLATFORM_EXTENSIONS` string described in table 4.1 (see flink:clGetPlatformInfo) or `CL_DEVICE_EXTENSIONS` string described in table 4.3 (see -flink:clGetDeviceInfo). - -If the `cl_khr_egl_event` extension is supported, then the following function is enabled: - - * flink:clCreateEventFromEGLSyncKHR - -The `cl_khr_egl_event` extension enables changes to the following: - - * flink:clGetEventInfo - * flink:clWaitForEvents - * flink:clGetEventInfo - * flink:clEnqueueAcquireGLObjects - -== See Also - -reflink:EXTENSION, -reflink:cl_khr_egl_image - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_event - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_egl_image.txt b/man/static/cl_khr_egl_image.txt deleted file mode 100644 index d6609c90..00000000 --- a/man/static/cl_khr_egl_image.txt +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_egl_image(3) - -== Name - -cl_khr_egl_image - Create derived resources, such as OpenCL image objects, from EGLImages. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_egl_image : enable ----- - -== Description - -If this extension is supported by an implementation, the string `cl_khr_egl_image` will be present in the `CL_PLATFORM_EXTENSIONS` string described in table 4.1 (see flink:clGetPlatformInfo) or `CL_DEVICE_EXTENSIONS` string described in table 4.3 (see -flink:clGetDeviceInfo). - -If the `cl_khr_egl_image` extension is supported, then the following functions are enabled: - - * flink:clCreateFromEGLImageKHR - * flink:clEnqueueAcquireEGLObjectsKHR - * flink:clEnqueueReleaseEGLObjectsKHR - -== See Also - -reflink:EXTENSION, -reflink:cl_khr_egl_event - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_egl_image - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_fp16.txt b/man/static/cl_khr_fp16.txt deleted file mode 100644 index fbc273ce..00000000 --- a/man/static/cl_khr_fp16.txt +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_fp16(3) - -== Name - -cl_khr_fp16 - Optional half floating-point support. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_fp16 : enable ----- - -== Description - -This extension adds support for half scalar and vector types as built-in types that can be used for arithmetic operations, conversions, etc. -An application that wants to use `half` and `halfn` types will need to include the directive shown above. - -The list of built-in scalar and vector data types are extended to include the types in the table below. - -The built-in vector data types for `halfn` are also declared as appropriate types in the OpenCL API (and header files) that can be used by an application. -The following table describes the built-in vector data types for `halfn` as defined in the OpenCL C programming language and the corresponding data type available to the application: - -[cols="1a,1a,1a", options="header"] -|==== -| Type in OpenCL Language | Description | API type for application -| half2 | 2-component half-precision floating-point vector | `cl_half2` -| half3 | 3-component half-precision floating-point vector | `cl_half3` -| half4 | 4-component half-precision floating-point vector | `cl_half4` -| half8 | 8-component half-precision floating-point vector | `cl_half8` -| half16 | 16-component half-precision floating-point vector | `cl_half16` -|==== - -The relational, equality, logical and logical unary reflink:operators can be used with `half` scalar and `halfn` vector types and shall produce a scalar `int` and vector `shortn` result respectively. - -The OpenCL compiler accepts an `h` and `H` suffix on floating-point literals, indicating the literal is typed as a `half`. - -The macro names given in the following list must use the values specified. -These constant expressions are suitable for use in `#if` preprocessing directives. - -The following table also describes the corresponding macro names available to the application. - -[cols="1a,1a,1a", options="header"] -|==== -| Macro in OpenCL Language | value | Macro for application -| `#define HALF_DIG` | `3` | `HALF_DIG` -| `#define HALF_MANT_DIG` | `11` | `HALF_MANT_DIG` -| `#define HALF_MAX_10_EXP` | `+4` | `HALF_MAX_10_EXP` -| `#define HALF_MAX_EXP` | `+16` | `HALF_MAX_EXP` -| `#define HALF_MIN_10_EXP` | `-4` | `HALF_MIN_10_EXP` -| `#define HALF_MIN_EXP` | `-13` | `HALF_MIN_EXP` -| `#define HALF_RADIX` | `2` | `HALF_RADIX` -| `#define HALF_MAX` | `0x1.ffcp15h` | `HALF_MAX` -| `#define HALF_MIN` | `0x1.0p-14h` | `HALF_MIN` -| `#define HALF_EPSILON` | `0x1.0p-10h` | `HALF_EPSILON` -|==== - -The following constants are also available. -They are of type `half` and are accurate within the precision of the `half` type. - -[cols="1a,1a", options="header"] -|==== -| Constant | Description -| `M_E_H` | Value of e -| `M_LOG2E_H` | Value of log~2~ e -| `M_LOG10E_H` | Value of log~10~ e -| `M_LN2_H` | Value of ln 2 -| `M_LN10_H` | Value of ln 10 -| `M_PI_H` | Value of {pi} -| `M_PI_2_H` | Value of {pi} / 2 -| `M_PI_4_H` | Value of {pi} / 4 -| `M_1_PI_H` | Value of 1 / {pi} -| `M_2_PI_H` | Value of 2 / {pi} -| `M_2_SQRTPI_H` | Value of 2 / {sqrt}{pi} -| `M_SQRT2_H` | Value of {sqrt}2 -| `M_SQRT1_2_H` | Value of 1 / {sqrt}2 -|==== - -Ann application may query the configuration information using the op-code `CL_DEVICE_HALF_FP_CONFIG` with flink:clGetDeviceInfo for an OpenCL device that supports half precision floating-point. - -.Conversions - -The implicit conversion rules specified in section 6.2.1 now include the `half` scalar and `halfn` vector data types. - -The explicit casts described in section 6.2.2 are extended to take a `half` scalar data type and a `halfn` vector data type. - -The explicit conversion functions described in section 6.2.3 are extended to take a `half` scalar data type and a `halfn` vector data type. - -The *as_typen*() function for re-interpreting types as described in section -6.2.4.2 is extended to allow conversion-free casts between `shortn`, -`ushortn` and `halfn` scalar and vector data types. - -== See Also - -reflink:EXTENSION, -flink:clGetDeviceInfo, -reflink:mathFunctions.txt, -reflink:commonFunctions.txt, -reflink:geometricFunctions.txt, -reflink:relationalFunctions.txt, -reflink:vectorDataLoadandStoreFunctions.txt, -reflink:asyncCopyFunctions.txt, -reflink:imageReadFunctions.txt -reflink:imageWriteFunctions.txt - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_fp16 - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_fp64.txt b/man/static/cl_khr_fp64.txt deleted file mode 100644 index 3d567e34..00000000 --- a/man/static/cl_khr_fp64.txt +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_fp64(3) - -== Name - -cl_khr_fp64 - Provided for backward compatibility if `double` floating-point precision is supported. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_fp64 : enable ----- - -== Description - -This extension was promoted to an optional core feature in OpenCL 1.2. -The extension string exists for backward compatibility if double precision -is supported. - -== See Also - -flink:clGetDeviceInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_fp64 - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_gl_depth_images.txt b/man/static/cl_khr_gl_depth_images.txt deleted file mode 100644 index 6b8486f0..00000000 --- a/man/static/cl_khr_gl_depth_images.txt +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_gl_depth_images(3) - -== Name - -cl_khr_gl_depth_images - Extends CL/GL sharing to allow a CL image to be created from a GL depth or depth-stencil texture. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_gl_depth_images : enable ----- - -== Description - -The `cl_khr_gl_depth_images` extension extends CL / GL sharing (i.e. -the reflink:cl_khr_gl_sharing extension) defined in section 9.7 to allow a CL depth image to be created from a GL depth or depth-stencil texture. -If this extension is supported by an implementation, the string cl_khr_gl_depth_images will be present in the `CL_PLATFORM_EXTENSIONS` string described in table 4.1 or `CL_DEVICE_EXTENSIONS` string described in table 4.3. - -Depth images with an image channel order of `CL_DEPTH_STENCIL` can only be created using the flink:clCreateFromGLTexture API. - -For the image format given by channel order of `CL_DEPTH_STENCIL` and channel data type of `CL_UNORM_INT24`, the depth is stored as an unsigned normalized 24-bit value. - -For the image format given by channel order of `CL_DEPTH_STENCIL` and channel data type of `CL_FLOAT`, each pixel is two 32-bit values. -The depth is stored as a single precision floating-point value followed by the stencil which is stored as a 8-bit integer value. - -The stencil value cannot be read or written using the read imagef and write imagef built-in functions in an OpenCL kernel. - -Depth image objects with an image channel order = `CL_DEPTH_STENCIL` cannot be used as arguments to flink:clEnqueueReadImage, -flink:clEnqueueWriteImage, -flink:clEnqueueCopyImage, -flink:clEnqueueCopyImageToBuffer, -flink:clEnqueueCopyBufferToImage, -flink:clEnqueueMapImage and -flink:clEnqueueFillImage and will return a `CL_INVALID_OPERATION` error. - -The following new image formats are added to table 9.4 in section 9.7.3.1 of the OpenCL 2.1 extension specification. -If a GL texture object with an internal format from table 9.4 is successfully created by OpenGL, then there is guaranteed to be a mapping to one of the corresponding CL image format(s) in that table. - -[cols="1a,1a", options="header"] -|==== -| GL internal format | CL image format (channel order, channel data type) -| `GL_DEPTH_COMPONENT32F` | `CL_DEPTH, CL_FLOAT` -| `GL_DEPTH_COMPONENT16` | `CL_DEPTH, CL_UNORM_INT16` -| `GL_DEPTH24_STENCIL8` | `CL_DEPTH_STENCIL, CL_UNORM_INT24` -| `GL_DEPTH32F_STENCIL8` | `CL_DEPTH_STENCIL, CL_FLOAT` -|==== - -== See Also - -No cross-references are available - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_depth_images - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_gl_event.txt b/man/static/cl_khr_gl_event.txt deleted file mode 100644 index 8e5e37a8..00000000 --- a/man/static/cl_khr_gl_event.txt +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_gl_event(3) - -== Name - -cl_khr_gl_event - Create an OpenCL event object from a GL sync object. - -== C Specification - -[source,c] ----- -cl_khr_gl_event ----- - -== Description - -This extension enables the function flink:clCreateEventFromGLsyncKHR. - -This extension allows creating OpenCL event objects linked to OpenGL fence sync objects, potentially improving efficiency of sharing images and buffers between the two APIs. -The companion `GL_ARB_cl_event` OpenGL extension provides the complementary functionality of creating an OpenGL sync object from an OpenCL event object. - -In addition, this extension modifies the behavior of flink:clEnqueueAcquireGLObjects and -flink:clEnqueueReleaseGLObjects to implicitly guarantee synchronization with an OpenGL context bound in the same thread as the OpenCL context. - -If this extension is supported by an implementation, the string `cl_khr_gl_event` will be present in the `CL_PLATFORM_EXTENSIONS` (see flink:clGetPlatformInfo) or `CL_DEVICE_EXTENSIONS` string (see -flink:clGetDeviceInfo). - -== Notes - -Event objects can also be used to reflect the status of an OpenGL sync object. -The sync object in turn refers to a fence command executing in an OpenGL command stream. -This provides another method of coordinating sharing of buffers and images between OpenGL and OpenCL (see section 9.7.6.1). - -If the `cl_khr_gl_event` extension is supported, then the OpenCL implementation will ensure that any such pending OpenGL operations are complete for an OpenGL context bound to the same thread as the OpenCL context. -This is referred to as implicit synchronization. - -If the `cl_khr_gl_event` extension is supported and the OpenGL context in question supports fence sync objects, completion of OpenGL commands may also be determined by placing a GL fence command after those commands using `glFenceSync`, creating an event from the resulting GL sync object using flink:clCreateEventFromGLsyncKHR, and determining completion of that event object via -flink:clEnqueueAcquireGLObjects. -This method may be considerably more efficient than calling `glFinish`, and is referred to as `explicit synchronization`. -Explicit synchronization is most useful when an OpenGL context bound to another thread is accessing the memory objects. - -If the `cl_khr_gl_event` extension is not supported, completion of OpenGL commands may be determined by issuing and waiting for completion of a `glFinish` command on all OpenGL contexts with pending references to these objects. -Some implementations may offer other efficient synchronization methods. -If such methods exist they will be described in platform-specific documentation. - -Note that no synchronization method other than `glFinish` is portable between all OpenGL implementations and all OpenCL implementations. -While this is the only way to ensure completion that is portable to all platforms, `glFinish` is an expensive operation and its use should be avoided if the `cl_khr_gl_event` extension is supported on a platform. - -== See Also - -reflink:EXTENSION, -flink:clCreateEventFromGLsyncKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_event - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_gl_msaa_sharing.txt b/man/static/cl_khr_gl_msaa_sharing.txt deleted file mode 100644 index e87bb2f8..00000000 --- a/man/static/cl_khr_gl_msaa_sharing.txt +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_gl_msaa_sharing(3) - -== Name - -cl_khr_gl_msaa_sharing - Extends the CL/GL sharing to support GL multi-sampled texture (color or depth). - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable ----- - -== Description - -This extension name is `cl_khr_gl_msaa_sharing`. -This extension requires reflink:cl_khr_gl_depth_images. - -This extension adds read_image and write_image functions to the built-in -link:imageReadFunctions.html[Image Read Functions] and -link:imageWriteFunctions.html[Image Write Functions], respectively. - -Multi-sample CL image objects (MSAA) can only be read from a kernel. -Multi-sample CL image objects cannot be used as arguments to flink:clEnqueueReadImage , -flink:clEnqueueWriteImage, -flink:clEnqueueCopyImage, -flink:clEnqueueCopyImageToBuffer, -flink:clEnqueueCopyBufferToImage, -flink:clEnqueueMapImage and -flink:clEnqueueFillImage and will return a `CL_INVALID_OPERATION` error. - -Add the following new data types to table 6.3 in section 6.1.3 of the OpenCL 2.1 specification (see reflink:otherDataTypes): - -[cols="1a,1a", options="header"] -|==== -| Type | Description -| `image2d_msaa_t` - | A 2D multi-sample color image. - Refer to section 6.13.14 for a detailed description of the built-in functions that use this type. -| `image2d_array_msaa_t` - | A 2D multi-sample color image array. - Refer to section 6.13.14 for a detailed description of the built-in functions that use this type. -| `image2d_msaa_depth_t` - | A 2D multi-sample depth image. - Refer to section 6.13.14 for a detailed description of the built-in functions that use this type. -| `image2d_array_msaa_depth_t` - | A 2D multi-sample depth image array. - Refer to section 6.13.14 for a detailed description of the built-in functions that use this type. -|==== - -NOTE: When a multisample image is accessed in a kernel, the access takes one -vector of integers describing which pixel to fetch and an integer -corresponding to the sample numbers describing which sample within the pixel -to fetch. _sample_ identifies the sample position in the multi-sample image. - -For best performance, we recommend that _sample_ be a literal value so it is -known at compile time and the OpenCL compiler can perform appropriate -optimizations for multisample reads on the device. - -No standard sampling instructions are allowed on the multisample image. -Accessing a coordinate outside the image and/or a sample that is outside the number of samples associated with each pixel in the image is undefined - -== See Also - -link:imageReadFunctions.html[Image Read Functions], -link:imageWriteFunctions.html[Image Write Functions], -flink:clGetGLTextureInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_msaa_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_gl_sharing.txt b/man/static/cl_khr_gl_sharing.txt deleted file mode 100644 index 3525731f..00000000 --- a/man/static/cl_khr_gl_sharing.txt +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_gl_sharing(3) - -== Name - -cl_khr_gl_sharing - Allow applications to use OpenGL buffer, texture and renderbuffer objects as OpenCL image objects. - -== C Specification - -[source,c] ----- -cl_khr_gl_sharing ----- - -== Description - -include::gl_sharingInc.txt[] - -== Notes - -If the reflink:cl_khr_mipmap_image, extension is supported by the OpenCL device, the `cl_khr_gl_sharing` extension adds support for creating a mip-mapped CL image from a mip-mapped GL texture. - -To create a mip-mapped CL image from a mip-mapped GL texture, the _miplevel_ argument to flink:clCreateFromGLTexture, should be a negative value. -If _miplevel_ is a negative value then a CL mipmapped image object is created from a mipmapped GL texture object instead of a CL image object for a specific miplevel of a GL texture. - -NOTE: For a detailed description of how the level of detail is computed, please refer to section 3.9.7 of the OpenGL 3.0 specification. - -== See Also - -reflink:EXTENSION, -flink:clGetGLContextInfoKHR, -flink:clCreateFromGLBuffer, -flink:clCreateFromGLTexture, -flink:clCreateFromGLRenderbuffer, -flink:clGetGLObjectInfo, -flink:clGetGLTextureInfo, -flink:clEnqueueAcquireGLObjects, -flink:clEnqueueReleaseGLObjects, -reflink:cl_khr_mipmap_image - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_gl_sharing - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_global_int32_base_atomics.txt b/man/static/cl_khr_global_int32_base_atomics.txt deleted file mode 100644 index 81a45a93..00000000 --- a/man/static/cl_khr_global_int32_base_atomics.txt +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_global_int32_base_atomics(3) - -== Name - -cl_khr_global_int32_base_atomics - Extension enabling base 32-bit atomic functions. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable ----- - -== Description - -This extension was promoted to a core feature in OpenCL 1.1. -The built-in atomic function names were changed to use the `atomic_` prefix -instead of `atom_`. - -== See Also - -reflink:cl_khr_global_int32_extended_atomics, -reflink:cl_khr_local_int32_base_atomics, -reflink:cl_khr_local_int32_extended_atomics - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int32_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_global_int32_extended_atomics.txt b/man/static/cl_khr_global_int32_extended_atomics.txt deleted file mode 100644 index e4ce63f1..00000000 --- a/man/static/cl_khr_global_int32_extended_atomics.txt +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_global_int32_extended_atomics(3) - -== Name - -cl_khr_global_int32_extended_atomics - Extension enabling extended 32-bit atomic functions - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable ----- - -== Description - -This extension was promoted to a core feature in OpenCL 1.1. -The built-in atomic function names were changed to use the `atomic_` prefix -instead of `atom_`. - -== See Also - -reflink:cl_khr_global_int32_base_atomics, -reflink:cl_khr_local_int32_base_atomics, -reflink:cl_khr_local_int32_extended_atomics - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int32_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_icd.txt b/man/static/cl_khr_icd.txt deleted file mode 100644 index df5349f2..00000000 --- a/man/static/cl_khr_icd.txt +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_icd(3) - -== Name - -cl_khr_icd - Extension through which the Khronos OpenCL installable client driver loader (ICD Loader) may expose multiple separate vendor installable client drivers (Vendor ICDs) for OpenCL. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_icd : enable ----- - -== Description - -This is a platform extension which defines a simple mechanism through which the Khronos OpenCL installable client driver loader (ICD Loader) may expose multiple separate vendor installable client drivers (Vendor ICDs) for OpenCL. -An application written against the ICD Loader will be able to access all `cl_platform_ids` exposed by all vendor implementations with the ICD Loader acting as a demultiplexor. -If this extension is supported by an implementation, the string `cl_khr_icd` will be present in the `CL_PLATFORM_EXTENSIONS` string described in the table of allowed values for _param_name_ for flink:clGetDeviceInfo. - -If the `cl_khr_icd` extension is supported, then the flink:clIcdGetPlatformIDsKHR function is enabled. - -The official source for the ICD loader is available at the Khronos website. -The complete `_cl_icd_dispatch` structure is defined in the header `icd_dispatch.h` which is available as a part of the source code. - -.Inferring Vendors from Function Call Arguments - -At every OpenCL function call, the ICD Loader infers the vendor ICD function to call from the arguments to the function. -An object is said to be ICD compatible if it is of the following structure: - -[source,c] ----- -struct _cl_ -{ - struct _cl_icd_dispatch *dispatch; - // ... remainder of internal data -}; ----- - -`` is one of `platform_id`, `device_id`, `context`, `command_queue`, `mem`, `program`, `kernel`, `event`, or `sampler`. - -The structure `_cl_icd_dispatch` is a function pointer dispatch table which is used to direct calls to a particular vendor implementation. -All objects created from ICD compatible objects must be ICD compatible. - -A link to source code which defines the entries in the function table structure `_cl_icd_dispatch` is available in the Sample Code section of the OpenCL specification. -The order of the functions in `_cl_icd_dispatch` is determined by the ICD Loader's source. -The ICD Loader's source's `_cl_icd_dispatch` table is to be appended to only. - -Functions which do not have an argument from which the vendor implementation may be inferred are ignored, with the exception of `clGetExtensionFunctionAddress`. -which is described below. - -.ICD Data - -A Vendor ICD is defined by two pieces of data: - - * The Vendor ICD library specifies a library which contains the OpenCL entry points for the vendor's OpenCL implementation. - The vendor ICD's library file name should include the vendor name, or a vendor-specific implementation identifier. - * The Vendor ICD extension suffix is a short string which specifies the default suffix for extensions implemented only by that vendor. - See Additions to Chapter 9 for details on the mechanism through which this is accomplished. - The vendor suffix string is optional. - -.ICD Loader Vendor Enumeration on Windows - -To enumerate Vendor ICDs on Windows, the ICD Loader scans the values in the registry key `HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors`. -For each value in this key which has `DWORD` data set to 0, the ICD Loader opens the dynamic link library specified by the name of the value using `LoadLibraryA`. - -For example, if the registry contains the following value - ----- -[HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors] -"c:\\vendor a\\vndra_ocl.dll"=dword:00000000 ----- - -then the ICD will open the library "`c:\vendor a\vndra_ocl.dll`". - -.ICD Loader Vendor Enumeration on Linux - -To enumerate vendor ICDs on Linux, the ICD Loader scans the files in the path `/etc/OpenCL/vendors`. -For each file in this path, the ICD Loader opens the file as a text file. -The expected format for the file is a single line of text which specifies the Vendor ICD's library. -The ICD Loader will attempt to open that file as a shared object using `dlopen()`. -Note that the library specified may be an absolute path or just a file name. - -For example, if the following file exists `/etc/OpenCL/vendors/VendorA.icd` and contains the text `libVendorAOpenCL.so` then the ICD Loader will load the library "`libVendorAOpenCL.so`". - -.ICD Loader Vendor Enumeration on Android - -To enumerate vendor ICDs on Android, the ICD Loader scans the files in the path `/system/vendor/Khronos/OpenCL/vendors`. -For each file in this path, the ICD Loader opens the file as a text file. -The expected format for the file is a single line of text which specifies the Vendor ICD's library. -The ICD Loader will attempt to open that file as a shared object using `dlopen()`. -Note that the library specified may be an absolute path or just a file name. - -For example, if the following file exists `/system/vendor/Khronos/OpenCL/vendors/VendorA.icd` and contains the text `libVendorAOpenCL.so` then the ICD Loader will load the library "`libVendorAOpenCL.so`". - -.Adding a Vendor Library - -Upon successfully loading a Vendor ICD's library, the ICD Loader queries the following functions from the library: flink:clIcdGetPlatformIDsKHR, -flink:clGetPlatformInfo, and `clGetExtensionFunctionAddress`. -If any of these functions are not present then the ICD Loader will close and ignore the library. - -Next the ICD Loader queries available ICD-enabled platforms in the library using flink:clIcdGetPlatformIDsKHR. -For each of these platforms, the ICD Loader queries the platform's extension string to verify that `cl_khr_icd` is supported, then queries the platform's Vendor ICD extension suffix using flink:clGetPlatformInfo with the value `CL_PLATFORM_ICD_SUFFIX_KHR`. - -If any of these steps fail, the ICD Loader will ignore the Vendor ICD and continue on to the next. - -== See Also - -reflink:EXTENSION, -flink:clIcdGetPlatformIDsKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_icd - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_il_program.txt b/man/static/cl_khr_il_program.txt deleted file mode 100644 index 96c03dfe..00000000 --- a/man/static/cl_khr_il_program.txt +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_il_program(3) - -== Name - -cl_khr_il_program - Enable loading SPIR IL programs - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_il_program : enable ----- - -== Description - -The OpenCL KHR extension reflink:cl_khr_il_program has been deprecated. -This feature is now core. - -== See Also - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_il_program - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_image2d_from_buffer.txt b/man/static/cl_khr_image2d_from_buffer.txt deleted file mode 100644 index 2c15ac8f..00000000 --- a/man/static/cl_khr_image2d_from_buffer.txt +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_image2d_from_buffer(3) - -== Name - -cl_khr_image2d_from_buffer - Extension enabling creating 2D image from buffer data - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_image2d_from_buffer : enable ----- - -== Description - -This extension was promoted to a core feature in OpenCL 2.0. - -// == See Also - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_image2d_from_buffer - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_initialize_memory.txt b/man/static/cl_khr_initialize_memory.txt deleted file mode 100644 index 3b9a900c..00000000 --- a/man/static/cl_khr_initialize_memory.txt +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_initialize_memory(3) - -== Name - -cl_khr_initialize_memory - Extension adding support for initializing local and private memory before a kernel begins execution. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_initialize_memory : enable ----- - -== Description - -Memory is allocated in various forms in OpenCL both explicitly (global memory) or implicitly (local, private memory). -This allocation so far does not provide a straightforward mechanism to initialize the memory on allocation. -In other words what is lacking is the equivalent of calloc for the currently supported malloc like capability. -This functionality is useful for a variety of reasons including ease of debugging, application controlled limiting of visibility to previous contents of memory and in some cases, optimization. - -This extension adds support for initializing local and private memory before a kernel begins execution. -This extension name is `cl_khr_initialize_memory`. - -Add a new context property to table 4.5 in section 4.4 (see flink:clCreateContext): - -[cols="1a,1a,1a", options="header"] -|==== -| cl_context_properties enum | Property value | Description -| `CL_CONTEXT_MEMORY_INITIALIZE_KHR` | `cl_context_memory_initialize_khr` - | Describes which memory types for the context must be initialized. - This is a bit-field, where the following values are currently supported: -+ - * `CL_CONTEXT_MEMORY_INITIALIZE_LOCAL_KHR` - Initialize local memory to zeros. - * `CL_CONTEXT_MEMORY_INITIALIZE_PRIVATE_KHR` - Initialize private memory to zeros. -|==== - -Updates to section 6.9 - Restrictions: - -If the context is created with `CL_CONTEXT_MEMORY_INITIALIZE_KHR`, appropriate memory locations as specified by the bit-field is initialized with zeroes, prior to the start of execution of any kernel. -The driver chooses when, prior to kernel execution, the initialization of local and/or private memory is performed. -The only requirement is there should be no values set from outside the context, which can be read during a kernel execution. - -== See Also - -No cross-references are available - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_initialize_memory - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_int64_base_atomics.txt b/man/static/cl_khr_int64_base_atomics.txt deleted file mode 100644 index 8c448ce9..00000000 --- a/man/static/cl_khr_int64_base_atomics.txt +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_int64_base_atomics(3) - -== Name - -cl_khr_int64_base_atomics - Optional extensions that implement base atomic operations on 64-bit signed and unsigned integers to locations in __global and __local memory. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable ----- - -== Description - -The behavior of these extensions is defined in the SPIR-V environment and appropriate kernel language specifications. - -== See Also - -reflink:EXTENSION, -reflink:cl_khr_int64_extended_atomics, -link:atomicFunctions.html[Atomic Functions] - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int64_base_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_int64_extended_atomics.txt b/man/static/cl_khr_int64_extended_atomics.txt deleted file mode 100644 index e3dccdde..00000000 --- a/man/static/cl_khr_int64_extended_atomics.txt +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_int64_extended_atomics(3) - -== Name - -cl_khr_int64_extended_atomics - Optional extensions that implement extended atomic operations on 64-bit signed and unsigned integers to locations in __global and __local memory. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable ----- - -== Description - -The behavior of these extensions is defined in the SPIR-V environment and appropriate kernel language specifications. - -== See Also - -reflink:EXTENSION, -reflink:cl_khr_int64_base_atomics, -link:atomicFunctions.html[Atomic Functions] - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int64_extended_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_local_int32_base_atomics.txt b/man/static/cl_khr_local_int32_base_atomics.txt deleted file mode 100644 index 7a18190b..00000000 --- a/man/static/cl_khr_local_int32_base_atomics.txt +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_local_int32_base_atomics(3) - -== Name - -cl_khr_local_int32_base_atomics - Extension enabling base atomic operations on 32-bit integers to locations in __local memory. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable ----- - -== Description - -This extension was promoted to a core feature in OpenCL 1.1. -The built-in atomic function names were changed to use the `atomic_` prefix -instead of `atom_`. - -== See Also - -reflink:cl_khr_global_int32_base_atomics, -reflink:cl_khr_global_int32_extended_atomics, -reflink:cl_khr_local_int32_extended_atomics - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int32_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_local_int32_extended_atomics.txt b/man/static/cl_khr_local_int32_extended_atomics.txt deleted file mode 100644 index dec50fa8..00000000 --- a/man/static/cl_khr_local_int32_extended_atomics.txt +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_local_int32_extended_atomics(3) - -== Name - -cl_khr_local_int32_extended_atomics - Extension enabling extended atomic operations on 32-bit integers to locations in __local memory. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable ----- - -== Description - -This extension was promoted to a core feature in OpenCL 1.1. -The built-in atomic function names were changed to use the `atomic_` prefix -instead of `atom_`. - -== See Also - -reflink:cl_khr_global_int32_base_atomics, -reflink:cl_khr_global_int32_extended_atomics, -reflink:cl_khr_local_int32_base_atomics - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_int32_atomics - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_mipmap_image.txt b/man/static/cl_khr_mipmap_image.txt deleted file mode 100644 index 67058baa..00000000 --- a/man/static/cl_khr_mipmap_image.txt +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_mipmap_image(3) - -== Name - -cl_khr_mipmap_image - Extension adding support for mipmaps. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_mipmap_image : enable - -#pragma OPENCL EXTENSION cl_khr_mipmap_image_writes : enable ----- - -== Description - -This extension adds support for mipmaps. -This proposal is implemented as two optional extensions. -The `cl_khr_mipmap_image` extension implements support to create a mipmapped image, enqueue commands to read/write/copy/map a region of a mipmapped image and built-in functions that can be used to read a mip-mapped image in an OpenCL C program. -The `cl_khr_mipmap_image_writes` extension adds built-in functions that can be used to write a mip-mapped image in an OpenCL C program. -If the `cl_khr_mipmap_image_writes` extension is supported by the OpenCL device, the `cl_khr_mipmap_image` extension must also be supported. - -This extension enables the following functions: - - * reflink:get_image_num_mip_levels - -This extension enables changes to the following: - - * flink:clCreateImage - * flink:clEnqueueReadImage - * flink:clEnqueueWriteImage - * flink:clEnqueueMapImage - * flink:clEnqueueCopyImage - * flink:clEnqueueCopyImageToBuffer - * flink:clEnqueueCopyBufferToImage - * flink:clCreateSamplerWithProperties - * flink:imageReadFunctions - * flink:imageWriteFunctions - * reflink:cl_khr_gl_sharing - * flink:clCreateFromGLTexture - -== See Also - -reflink:EXTENSION, -reflink:get_image_num_mip_levels.txt - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_mipmap_image - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_priority_hints.txt b/man/static/cl_khr_priority_hints.txt deleted file mode 100644 index 776ad08f..00000000 --- a/man/static/cl_khr_priority_hints.txt +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_priority_hints(3) - -== Name - -cl_khr_priority_hints - Extension adding priority hints for OpenCL - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_priority_hints : enable ----- - -== Description - -This extension adds priority hints for OpenCL, but does not specify the scheduling behavior or minimum guarantees. -It is expected that the the user guides associated with each implementation which supports this extension describe the scheduling behavior guaranteed. - -If this extension is supported by an implementation, the string cl_khr_priority_hints will be present in the `CL_PLATFORM_EXTENSIONS` string described in table 4.1 (see flink:clGetPlatformInfo). - -== See Also - -flink:clGetPlatformInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_priority_hints - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_spir.txt b/man/static/cl_khr_spir.txt deleted file mode 100644 index eb4d7f84..00000000 --- a/man/static/cl_khr_spir.txt +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_spir(3) - -== Name - -cl_khr_spir - Extension adding support to create an OpenCL program object from a Standard Portable Intermediate Representation (SPIR) instance. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_spir : enable ----- - -== Description - -This extension adds support to create an OpenCL program object from a Standard Portable Intermediate Representation (SPIR) instance. -SPIR is a vendor neutral non-source representation for OpenCL C programs that has since been superceded by the SPIR-V standard. - -flink:clCreateProgramWithBinary can be used to load a SPIR binary. -Once a program object has been created from a SPIR binary, -flink:clBuildProgram can be called to build a program executable or -flink:clCompileProgram can be called to compile the SPIR binary. - -This extension adds changes to the following: - - * flink:clGetDeviceInfo - * flink:clGetProgramBuildInfo - * flink:clGetKernelArgInfo - -== See Also - -reflink:EXTENSION, -flink:clCreateProgramWithBinary, -flink:clBuildProgram, -flink:clCompileProgram, -flink:clGetDeviceInfo, -flink:clGetProgramBuildInfo, -flink:clGetKernelArgInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_spir - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_srgb_image_writes.txt b/man/static/cl_khr_srgb_image_writes.txt deleted file mode 100644 index e567116d..00000000 --- a/man/static/cl_khr_srgb_image_writes.txt +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_srgb_image_writes(3) - -== Name - -cl_khr_srgb_image_writes - Extension allowing writes to sRGB images from a kernel. - -== Description - -This extensions adds changes to the following: - - * flink:imageWriteFunctions - * flink:clGetSupportedImageFormats - -== See Also - -reflink:EXTENSION, -flink:clCreateProgramWithBinary, -flink:clBuildProgram, -flink:clCompileProgram, - -== Description - -This extension enables kernels to write to sRGB images using the -reflink:imageWriteFunctions[write_imagef] built-in function. -The sRGB image formats that may be written to will be returned by -flink:clGetSupportedImageFormats. - -When the image is an sRGB image, the reflink:imageWriteFunctions[write_imagef] -built-in function will perform the linear to sRGB conversion. Only the R, G, -and B components are converted from linear to sRGB; the A component is -written as-is. - -// == See Also - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_srgb_image_writes - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_subgroups.txt b/man/static/cl_khr_subgroups.txt deleted file mode 100644 index 64a9075f..00000000 --- a/man/static/cl_khr_subgroups.txt +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_subgroups(3) - -== Name - -cl_khr_subgroups - Deprecated extension - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_subgroups : enable ----- - -== Description - -The OpenCL 2.0 KHR extension `cl_khr_subgroups` has been deprecated. -The feature is now core. - -== See Also - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_subgroups - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_terminate_context.txt b/man/static/cl_khr_terminate_context.txt deleted file mode 100644 index d20eec86..00000000 --- a/man/static/cl_khr_terminate_context.txt +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_terminate_context(3) - -== Name - -cl_khr_terminate_context - Extension allowing an API to release a context. - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_terminate_context : enable ----- - -== Description - -Today, OpenCL provides an API to release a context. -This operation is done only after all queues, memory object, programs and kernels are released, which in turn might wait for all ongoing operations to complete. -However, there are cases in which a fast release is required, or release operation cannot be done, as commands are stuck in mid execution. -An example of the first case can be program termination due to exception, or quick shutdown due to low power. -Examples of the second case are when a kernel is running too long, or gets stuck, or it may result from user action which makes the results of the computation unnecessary. - -In many cases, the driver or the device is capable of speeding up the closure of ongoing operations when the results are no longer required in a much more expedient manner than waiting for all previously enqueued operations to finish. - -This extension implements a new query to check whether a device can terminate an OpenCL context and adds an API to terminate a context. - -This extensions enables the following function: - - * flink:clTerminateContextKHR - -This extensions adds changes to the following: - - * flink:clGetDeviceInfo - * flink:clCreateContext - -== See Also - -flink:clTerminateContextKHR - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_terminate_context - -== Copyright - -include::footer.txt[] diff --git a/man/static/cl_khr_throttle_hints.txt b/man/static/cl_khr_throttle_hints.txt deleted file mode 100644 index 8463c503..00000000 --- a/man/static/cl_khr_throttle_hints.txt +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2014-2024 The Khronos Group Inc. -// SPDX-License-Identifier: CC-BY-4.0 - -:data-uri: -:icons: font -include::{config}/attribs.txt[] - -= cl_khr_throttle_hints(3) - -== Name - -cl_khr_throttle_hints - Extension adding throttle hints for OpenCL - -== C Specification - -[source,c] ----- -#pragma OPENCL EXTENSION cl_khr_throttle_hints : enable ----- - -== Description - -This extension adds throttle hints for OpenCL, but does not specify the throttling behaviour or minimum guarantees. -It is expected that the user guide associated with each implementation which supports this extension describe the throttling behaviour guaranteed. - -If this extension is supported by an implementation, the string cl_khr_throttle_hints will be present in the `CL_PLATFORM_EXTENSIONS` string described in table 4.1 (see flink:clGetPlatformInfo). - -Note that the throttle hint is orthogonal to functionality defined in reflink:cl_khr_priority_hints) extension. -For example, a task may have high priority (`CL_QUEUE_PRIORITY_HIGH_KHR`) but should at the same time be executed at an optimized throttle setting (`CL_QUEUE_THROTTLE_LOW`). - -== See Also - -flink:clGetPlatformInfo - -== Document Notes - -For more information, see the OpenCL Extension Specification at URL - -https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_throttle_hints - -== Copyright - -include::footer.txt[] diff --git a/man/static/gl_formatsInc.txt b/man/static/gl_formatsInc.txt deleted file mode 100644 index 8264d602..00000000 --- a/man/static/gl_formatsInc.txt +++ /dev/null @@ -1,74 +0,0 @@ -.OpenGL and Corresponding OpenCL Image Formats - -The table below (Table 9.4) describes the list of GL texture internal -formats and the corresponding CL image formats. -If a GL texture object with an internal format from the table below is -successfully created by OpenGL, then there is guaranteed to be a mapping to -one of the corresponding CL image format(s) in that table. -Texture objects created with other OpenGL internal formats may (but are not -guaranteed to) have a mapping to a CL image format; if such mappings exist, -they are guaranteed to preserve all color components, data types, and at -least the number of bits/component actually allocated by OpenGL for that -format. - -[cols="1a,1a", options="header"] -|==== -|GL internal format | CL image format (channel order, channel data type) -| `GL_RGBA8` | `CL_RGBA, CL_UNORM_INT8 or CL_BGRA, CL_UNORM_INT8` -| `GL_SRGBA8_ALPHA8` | `CL_sRGBA, CL_UNORM_INT8` -| `GL_RGBA`, - `GL_UNSIGNED_INT_8_8_8_8_REV` | `CL_RGBA, CL_UNORM_INT8` -| `GL_BGRA`, - `GL_UNSIGNED_INT_8_8_8_8_REV` | `CL_BGRA, CL_UNORM_INT8` -| `GL_RGBA8I, GL_RGBA8I_EXT` | `CL_RGBA, CL_SIGNED_INT8` -| `GL_RGBA16I, GL_RGBA16I_EXT` | `CL_RGBA, CL_SIGNED_INT16` -| `GL_RGBA32I, GL_RGBA32I_EXT` | `CL_RGBA, CL_SIGNED_INT32` -| `GL_RGBA8UI, GL_RGBA8UI_EXT` | `CL_RGBA, CL_UNSIGNED_INT8` -| `GL_RGBA16UI, GL_RGBA16UI_EXT` | `CL_RGBA, CL_UNSIGNED_INT16` -| `GL_RGBA32UI, GL_RGBA32UI_EXT` | `CL_RGBA, CL_UNSIGNED_INT32` -| `GL_RGBA8_SNORM` | `CL_RGBA, CL_SNORM_INT8` -| `GL_RGBA16` | `CL_RGBA, CL_UNORM_INT16` -| `GL_RGBA16_SNORM` | `CL_RGBA, CL_SNORM_INT166` -| `GL_RGBA16F, GL_RGBA16F_ARB` | `CL_RGBA, CL_HALF_FLOAT` -| `GL_RGBA32F, GL_RGBA32F_ARB` | `CL_RGBA, CL_FLOAT` -| `GL_R8` | `CL_R, CL_UNORM_INT8` -| `GL_R8_SNORM` | `CL_R, CL_SNORM_INT8` -| `GL_R16` | `CL_R, CL_UNORM_INT16` -| `GL_R16_SNORM` | `CL_R, CL_SNORM_INT16` -| `GL_R16F` | `CL_R, CL_HALF_FLOAT` -| `GL_R32F` | `CL_R, CL_FLOAT` -| `GL_R8I` | `CL_R, CL_SIGNED_INT8` -| `GL_R16I` | `CL_R, CL_SIGNED_INT16` -| `GL_R32I` | `CL_R, CL_SIGNED_INT32` -| `GL_R8UI` | `CL_R, CL_UNSIGNED_INT8` -| `GL_R16UI` | `CL_R, CL_UNSIGNED_INT16` -| `GL_R32UI` | `CL_R, CL_UNSIGNED_INT32` -| `GL_RG8` | `CL_RG, CL_UNORM_INT8` -| `GL_RG8_SNORM` | `CL_RG, CL_SNORM_INT8` -| `GL_RG16` | `CL_RG, CL_UNORM_INT16` -| `GL_RG16_SNORM` | `CL_RG, CL_SNORM_INT16` -| `GL_RG16F` | `CL_RG, CL_HALF_FLOAT` -| `GL_RG32F` | `CL_RG, CL_FLOAT` -| `GL_RG8I` | `CL_RG, CL_SIGNED_INT8` -| `GL_RG16I` | `CL_RG, CL_SIGNED_INT16` -| `GL_RG32I` | `CL_RG, CL_SIGNED_INT32` -| `GL_RG8UI` | `CL_RG, CL_UNSIGNED_INT8` -| `GL_RG16UI` | `CL_RG, CL_UNSIGNED_INT16` -| `GL_RG32UI` | `CL_RG, CL_UNSIGNED_INT32` -|==== - -If the reflink:cl_khr_gl_depth_images extension is enabled, the following -new image formats are added to table 9.4 in section 9.6.3.1 of the OpenCL -2.0 extension specification. If a GL texture object with an internal format -from table 9.4 is successfully created by OpenGL, then there is guaranteed -to be a mapping to one of the corresponding CL image format(s) in that -table. - -[cols="1a,1a", options="header"] -|==== -| GL internal format | CL image format (channel order, channel data type) -| `GL_DEPTH_COMPONENT32F` | `CL_DEPTH, CL_FLOAT` -| `GL_DEPTH_COMPONENT16` | `CL_DEPTH, CL_UNORM_INT16` -| `GL_DEPTH24_STENCIL8` | `CL_DEPTH_STENCIL, CL_UNORM_INT24` -| `GL_DEPTH32F_STENCIL8` | `CL_DEPTH_STENCIL, CL_FLOAT` -|==== diff --git a/man/static/gl_lifetimeInc.txt b/man/static/gl_lifetimeInc.txt deleted file mode 100644 index 7704c2c8..00000000 --- a/man/static/gl_lifetimeInc.txt +++ /dev/null @@ -1,18 +0,0 @@ -.Lifetime of [GL] Shared Objects - -An OpenCL memory object created from an OpenGL object (hereinafter refered -to as a "shared CL/GL object") remains valid as long as the corresponding GL -object has not been deleted. If the GL object is deleted through the GL API -(e.g. `glDeleteBuffers`, `glDeleteTextures`, or `glDeleteRenderbuffers`), -subsequent use of the CL buffer or image object will result in undefined -behavior, including but not limited to possible CL errors and data -corruption, but may not result in program termination. - -The CL context and corresponding command-queues are dependent on the -existence of the GL share group object, or the share group associated with -the GL context from which the CL context is created. If the GL share group -object or all GL contexts in the share group are destroyed, any use of the -CL context or command-queue(s) will result in undefined behavior, which may -include program termination. Applications should destroy the CL -command-queue(s) and CL context before destroying the corresponding GL share -group or contexts. diff --git a/man/static/gl_sharingInc.txt b/man/static/gl_sharingInc.txt deleted file mode 100644 index d389e6c4..00000000 --- a/man/static/gl_sharingInc.txt +++ /dev/null @@ -1,27 +0,0 @@ -General information about GL sharing follows. - -The OpenCL specification in section 9.7 defines how to share data with -texture and buffer objects in a parallel OpenGL implementation, but does not -define how the association between an OpenCL context and an OpenGL context -or share group is established. This extension defines optional attributes to -OpenCL context creation routines which associate a GL context or share group -object with a newly created OpenCL context. If this extension is supported -by an implementation, the string "cl_khr_gl_sharing" will be present in the -`CL_DEVICE_EXTENSIONS` string described in the table of allowed values for -_param_name_ for flink:clGetDeviceInfo or in the `CL_PLATFORM_EXTENSIONS` -string described in the table of allowed values for _param_name_ for -flink:clGetPlatformInfo. - -This section discusses OpenCL functions that allow applications to use -OpenGL buffer, texture, and renderbuffer objects as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and OpenGL. The OpenCL -API may be used to execute kernels that read and/or write memory objects -that are also OpenGL objects. - -An OpenCL image object may be created from an OpenGL texture or renderbuffer -object. An OpenCL buffer object may be created from an OpenGL buffer object. - -Any supported OpenGL object defined within the GL share group object, or the -share group associated with the GL context from which the CL context is -created, may be shared, with the exception of the default OpenGL objects -(i.e. objects named zero), which may not be shared. diff --git a/man/static/gl_syncInc.txt b/man/static/gl_syncInc.txt deleted file mode 100644 index 735f0d9a..00000000 --- a/man/static/gl_syncInc.txt +++ /dev/null @@ -1,66 +0,0 @@ -.Synchronizing OpenCL and OpenGL Access - -In order to ensure data integrity, the application is responsible for -synchronizing access to shared CL/GL objects by their respective APIs. -Failure to provide such synchronization may result in race conditions and -other undefined behavior including non-portability between implementations. - -Prior to calling flink:clEnqueueAcquireGLObjects, the application must -ensure that any pending GL operations which access the objects specified in -_mem_objects_ have completed. This may be accomplished portably by issuing -and waiting for completion of a `glFinish` command on all GL contexts with -pending references to these objects. Implementations may offer more -efficient synchronization methods; for example on some platforms calling -`glFlush` may be sufficient, or synchronization may be implicit within a -thread, or there may be vendor-specific extensions that enable placing a -fence in the GL command stream and waiting for completion of that fence in -the CL command-queue. Note that no synchronization methods other than -`glFinish` are portable between OpenGL implementations at this time. - -When the extension reflink:cl_khr_egl_event is supported: Prior to calling -flink:clEnqueueAcquireGLObjects, the application must ensure that any -pending EGL or EGL client API operations which access the objects specified -in _mem_objects_ have completed. If the reflink:cl_khr_egl_event extension -is supported and the EGL context in question supports fence sync objects, -_explicit synchronisation_ can be achieved as set out in section 5.7.1. If -the reflink:cl_khr_egl_event extension is not supported, completion of EGL -client API commands may be determined by issuing and waiting for completion -of commands such as `glFinish` or `vgFinish` on all client API contexts with -pending references to these objects. Some implementations may offer other -efficient synchronization methods. If such methods exist they will be -described in platform-specific documentation. Note that no synchronization -methods other than `glFinish` and `vgFinish` are portable between all EGL -client API implementations and all OpenCL implementations. While this is the -only way to ensure completion that is portable to all platforms, these are -expensive operation and their use should be avoided if the -reflink:cl_khr_egl_event extension is supported on a platform. - -Similarly, after calling flink:clEnqueueReleaseGLObjects, the application is -responsible for ensuring that any pending OpenCL operations which access the -objects specified in _mem_objects_ have completed prior to executing -subsequent GL commands which reference these objects. This may be -accomplished portably by calling flink:clWaitForEvents with the event object -returned by flink:clEnqueueReleaseGLObjects, or by calling flink:clFinish. -As above, some implementations may offer more efficient methods. - -The application is responsible for maintaining the proper order of -operations if the CL and GL contexts are in separate threads. - -If a GL context is bound to a thread other than the one in which -flink:clEnqueueReleaseGLObjects is called, changes to any of the objects in -_mem_objects_ may not be visible to that context without additional steps -being taken by the application. For an OpenGL 3.1 (or later) context, the -requirements are described in Appendix D ("Shared Objects and Multiple -Contexts") of the OpenGL 3.1 Specification. For prior versions of OpenGL, -the requirements are implementation-dependent. - -Attempting to access the data store of an OpenGL object after it has been -acquired by OpenCL and before it has been released will result in undefined -behavior. Similarly, attempting to access a shared CL/GL object from OpenCL -before it has been acquired by the OpenCL command-queue, or after it has -been released, will result in undefined behavior. - -If the reflink:cl_khr_gl_event extension is supported, -then the OpenCL implementation will ensure that any such pending OpenGL -operations are complete for an OpenGL context bound to the same thread as -the OpenCL context. This is referred to as implicit synchronization. diff --git a/man/static/sharingD3D10Inc.txt b/man/static/sharingD3D10Inc.txt deleted file mode 100644 index f14213b2..00000000 --- a/man/static/sharingD3D10Inc.txt +++ /dev/null @@ -1,45 +0,0 @@ -General information about sharing memory objects with Direct3D 10 follows. - -The goal of this extension is to provide interoperability between OpenCL and -Direct3D 10. This is designed to function analogously to the -reflink:cl_khr_gl_sharing as defined in sections 9.7 and 9.8. If this -extension is supported by an implementation, the string -"cl_khr_d3d10_sharing" will be present in the `CL_PLATFORM_EXTENSIONS` (see -flink:clGetPlatformInfo) or `CL_DEVICE_EXTENSIONS` (see -flink:clGetDeviceInfo). - -As currently proposed the interfaces for this extension would be provided in -`cl_d3d10.h`. - -The OpenCL functions enabled by the reflink:cl_khr_d3d10_sharing extension -allow applications to use Direct3D 10 resources as OpenCL memory objects. -This allows efficient sharing of data between OpenCL and Direct3D 10. The -OpenCL API may be used to execute kernels that read and/or write memory -objects that are also Direct3D 10 resources. An OpenCL image object may be -created from a Direct3D 10 texture resource. An OpenCL buffer object may be -created from a Direct3D 10 buffer resource. OpenCL memory objects may be -created from Direct3D 10 objects if and only if the OpenCL context has been -created from a Direct3D 10 device. - -.Lifetime of Shared [D3D10] Objects - -An OpenCL memory object created from a Direct3D 10 resource remains valid as -long as the corresponding Direct3D 10 resource has not been deleted. If the -Direct3D 10 resource is deleted through the Direct3D 10 API, subsequent use -of the OpenCL memory object will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -The successful creation of a `cl_context` against a Direct3D 10 device -specified via the context create parameter `CL_CONTEXT_D3D10_DEVICE_KHR` -will increment the internal Direct3D reference count on the specified -Direct3D 10 device. The internal Direct3D reference count on that Direct3D -10 device will be decremented when the OpenCL reference count on the -returned OpenCL context drops to zero. - -The OpenCL context and corresponding command-queues are dependent on the -existence of the Direct3D 10 device from which the OpenCL context was -created. If the Direct3D 10 device is deleted through the Direct3D 10 API, -subsequent use of the OpenCL context will result in undefined behavior, -including but not limited to possible OpenCL errors, data corruption, and -program termination. diff --git a/man/static/sharingD3D11Inc.txt b/man/static/sharingD3D11Inc.txt deleted file mode 100644 index 8cc2a914..00000000 --- a/man/static/sharingD3D11Inc.txt +++ /dev/null @@ -1,49 +0,0 @@ -.Sharing Memory Objects with Direct3D 11 Resources - -The goal of this extension is to provide interoperability between OpenCL and -Direct3D 11. This is designed to function analogously to the -reflink:cl_khr_gl_sharing as defined in sections 9.7 and 9.8. If this -extension is supported by an implementation, the string -"cl_khr_d3d11_sharing" will be present in the `CL_PLATFORM_EXTENSIONS` or -`CL_DEVICE_EXTENSIONS` string described in the table of allowed values for -_param_name_ for flink:clGetDeviceInfo or flink:clGetPlatformInfo. - -As currently proposed the interfaces for this extension would be provided in -`cl_d3d11.h`. - -This section discusses OpenCL functions that allow applications to use -Direct3D 11 resources as OpenCL memory objects. This allows efficient -sharing of data between OpenCL and Direct3D 11. The OpenCL API may be used -to execute kernels that read and/or write memory objects that are also -Direct3D 11 resources. An OpenCL image object may be created from a Direct3D -11 texture resource. An OpenCL buffer object may be created from a Direct3D -11 buffer resource. OpenCL memory objects may be created from Direct3D 11 -objects if and only if the OpenCL context has been created from a Direct3D -11 device. - -.Lifetime of Shared Objects - -An OpenCL memory object created from a Direct3D 11 resource remains valid as -long as the corresponding Direct3D 11 resource has not been deleted. If the -Direct3D 11 resource is deleted through the Direct3D 11 API, subsequent use -of the OpenCL memory object will result in undefined behavior, including but -not limited to possible OpenCL errors, data corruption, and program -termination. - -The successful creation of a `cl_context` against a Direct3D 11 device -specified via the context create parameter `CL_CONTEXT_D3D11_DEVICE_KHR` -will increment the internal Direct3D reference count on the specified -Direct3D 11 device. The internal Direct3D reference count on that Direct3D -11 device will be decremented when the OpenCL reference count on the -returned OpenCL context drops to zero. - -The OpenCL context and corresponding command-queues are dependent on the -existence of the Direct3D 11 device from which the OpenCL context was -created. If the Direct3D 11 device is deleted through the Direct3D 11 API, -subsequent use of the OpenCL context will result in undefined behavior, -including but not limited to possible OpenCL errors, data corruption, and -program termination. - -Properties of Direct3D 11 objects may be queried using -flink:clGetMemObjectInfo and flink:clGetImageInfo with _param_name_ -`CL_MEM_D3D11_RESOURCE_KHR` and `CL_IMAGE_D3D11_SUBRESOURCE_KHR`. diff --git a/man/static/sharingDX9Inc.txt b/man/static/sharingDX9Inc.txt deleted file mode 100644 index e05578a7..00000000 --- a/man/static/sharingDX9Inc.txt +++ /dev/null @@ -1,77 +0,0 @@ -General information about sharing memory objects with DX9 follows. - -The goal of this extension is to allow applications to use media surfaces as -OpenCL memory objects. This allows efficient sharing of data between OpenCL -and selected adapter APIs (only DX9 for now). If this extension is -supported, an OpenCL image object can be created from a media surface and -the OpenCL API can be used to execute kernels that read and/or write memory -objects that are media surfaces. Note that OpenCL memory objects may be -created from the adapter media surface if and only if the OpenCL context has -been created from that adapter. - -If this extension is supported by an implementation, the string -"cl_khr_dx9_media_sharing" will be present in the `CL_PLATFORM_EXTENSIONS` -(see flink:clGetPlatformInfo) or `CL_DEVICE_EXTENSIONS` (see -flink:clGetDeviceInfo). - -As currently proposed the interfaces for this extension would be provided in -`cl_dx9_media_sharing.h`. - -.Surface formats for Media Surface Sharing - -This section includes the D3D surface formats that are supported when the -adapter type is one of the Direct 3D lineage. Using a D3D surface format not -listed here is an error. To extend the use of this extension to support -media adapters beyond DirectX9 tables similar to the ones in this section -will need to be defined for the surface formats supported by the new media -adapter. All implementations that support this extension are required to -support the NV12 surface format, the other surface formats supported are the -same surface formats that the adapter you are sharing with supports as long -as they are listed in the tables below (Tables 9.10.3 and 9.10.4). - -[cols="1a,1a", options="header"] -|==== -| FOUR CC code | CL image format (channel order, channel data type) -| FOURCC('N', 'V', '1', '2'), Plane 0 | `CL_R, CL_UNORM_INT8` -| FOURCC('N', 'V', '1', '2'), Plane 1 | `CL_RG, CL_UNORM_INT8` -| FOURCC('Y', 'V', '1', '2'), Plane 0 | `CL_R, CL_UNORM_INT8` -| FOURCC('Y', 'V', '1', '2'), Plane 1 | `CL_R, CL_UNORM_INT8` -| FOURCC('Y', 'V', '1', '2'), Plane 2 | `CL_R, CL_UNORM_INT8` -|==== - -In the table above, NV12 Plane 0 corresponds to the luminance (Y) channel -and Plane 1 corresponds to the UV channels. The YV12 Plane 0 corresponds to -the Y channel, Plane 1 to the U channel and Plane 2 to the V channel. Note -that the YUV formats map to `CL_R` and `CL_RG` but do not perform any YUV to -RGB conversion and vice-versa. - -[cols="1a,1a", options="header"] -|==== -| D3D format | CL image format (channel order, channel data type) -| D3DFMT_R32F | `CL_R, CL_FLOAT` -| D3DFMT_R16F | `CL_R, CL_HALF_FLOAT` -| D3DFMT_L16 | `CL_R, CL_UNORM_INT16` -| D3DFMT_A8 | `CL_A, CL_UNORM_INT8` -| D3DFMT_L8 | `CL_R, CL_UNORM_INT8` -| D3DFMT_G32R32F | `CL_RG, CL_FLOAT` -| D3DFMT_G16R16F | `CL_RG, CL_HALF_FLOAT` -| D3DFMT_G16R16 | `CL_RG, CL_UNORM_INT16` -| D3DFMT_A8L8 | `CL_RG, CL_UNORM_INT8` -| D3DFMT_A32B32G32R32F | `CL_RGBA, CL_FLOAT` -| D3DFMT_A16B16G16R16F | `CL_RGBA, CL_HALF_FLOAT` -| D3DFMT_A16B16G16R16 | `CL_RGBA, CL_UNORM_INT16` -| D3DFMT_A8B8G8R8 | `CL_RGBA, CL_UNORM_INT8` -| D3DFMT_X8B8G8R8 | `CL_RGBA, CL_UNORM_INT8` -| D3DFMT_A8R8G8B8 | `CL_BGRA, CL_UNORM_INT8` -| D3DFMT_X8R8G8B8 | `CL_BGRA, CL_UNORM_INT8` -|==== - -Note that D3D9 format names seem to imply that the order of the color -channels are switched relative to OpenCL but this is not the case. For -example, layout of channels for each pixel for `D3DFMT_A32FB32FG32FR32F` is -the same as `CL_RGBA`, `CL_FLOAT`. - -Properties of media surface objects may be queried using -flink:clGetMemObjectInfo and flink:clGetImageInfo with _param_name_ -`CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR`, `CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR` and -`CL_IMAGE_DX9_MEDIA_SURFACE_PLANE_KHR`. diff --git a/scripts/apiconventions.py b/scripts/apiconventions.py new file mode 100644 index 00000000..4d27d04f --- /dev/null +++ b/scripts/apiconventions.py @@ -0,0 +1,13 @@ +#!/usr/bin/python3 -i +# +# Copyright 2021-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +# Generic alias for working group-specific API conventions interface. + +# This import should be changed at the repository / working group level to +# specify the correct API's conventions. + +defaultAPI = 'opencl' + +from clconventions import OpenCLConventions as APIConventions diff --git a/scripts/cgenerator.py b/scripts/cgenerator.py index d4cab2b0..f86658ee 100644 --- a/scripts/cgenerator.py +++ b/scripts/cgenerator.py @@ -6,9 +6,11 @@ import os import re -from generator import (GeneratorOptions, OutputGenerator, noneStr, - regSortFeatures, write) +from generator import (GeneratorOptions, + MissingGeneratorOptionsConventionsError, + MissingGeneratorOptionsError, MissingRegistryError, + OutputGenerator, noneStr, regSortFeatures, write) class CGeneratorOptions(GeneratorOptions): """CGeneratorOptions - subclass of GeneratorOptions. @@ -17,12 +19,14 @@ class CGeneratorOptions(GeneratorOptions): generation.""" def __init__(self, - prefixText="", + prefixText='', genFuncPointers=True, protectFile=True, protectFeature=True, protectProto=None, protectProtoStr=None, + protectExtensionProto=None, + protectExtensionProtoStr=None, apicall='', apientry='', apientryp='', @@ -31,6 +35,7 @@ def __init__(self, alignFuncParam=0, genEnumBeginEndRange=False, genAliasMacro=False, + genStructExtendsComment=False, aliasMacro='', misracstyle=False, misracppstyle=False, @@ -40,11 +45,11 @@ def __init__(self, Additional parameters beyond parent class: - prefixText - list of strings to prefix generated header with - (usually a copyright statement + calling convention macros). + (usually a copyright statement + calling convention macros) - protectFile - True if multiple inclusion protection should be - generated (based on the filename) around the entire header. + generated (based on the filename) around the entire header - protectFeature - True if #ifndef..#endif protection should be - generated around a feature interface in the header file. + generated around a feature interface in the header file - genFuncPointers - True if function pointer typedefs should be generated - protectProto - If conditional protection should be generated @@ -54,12 +59,19 @@ def __init__(self, set to None. - protectProtoStr - #ifdef/#ifndef symbol to use around prototype declarations, if protectProto is set + - protectExtensionProto - If conditional protection should be generated + around extension prototype declarations, set to either '#ifdef' + to require opt-in (#ifdef protectExtensionProtoStr) or '#ifndef' + to require opt-out (#ifndef protectExtensionProtoStr). Otherwise + set to None + - protectExtensionProtoStr - #ifdef/#ifndef symbol to use around + extension prototype declarations, if protectExtensionProto is set - apicall - string to use for the function declaration prefix, - such as APICALL on Windows. + such as APICALL on Windows - apientry - string to use for the calling convention macro, - in typedefs, such as APIENTRY. + in typedefs, such as APIENTRY - apientryp - string to use for the calling convention macro - in function pointer typedefs, such as APIENTRYP. + in function pointer typedefs, such as APIENTRYP - indentFuncProto - True if prototype declarations should put each parameter on a separate line - indentFuncPointer - True if typedefed function pointers should put each @@ -70,6 +82,9 @@ def __init__(self, be generated for enumerated types - genAliasMacro - True if the OpenXR alias macro should be generated for aliased types (unclear what other circumstances this is useful) + - genStructExtendsComment - True if comments showing the structures + whose pNext chain a structure extends are included before its + definition - aliasMacro - alias macro to inject when genAliasMacro is True - misracstyle - generate MISRA C-friendly headers - misracppstyle - generate MISRA C++-friendly headers""" @@ -94,6 +109,12 @@ def __init__(self, self.protectProtoStr = protectProtoStr """#ifdef/#ifndef symbol to use around prototype declarations, if protectProto is set""" + self.protectExtensionProto = protectExtensionProto + """If conditional protection should be generated around extension prototype declarations, set to either '#ifdef' to require opt-in (#ifdef protectExtensionProtoStr) or '#ifndef' to require opt-out (#ifndef protectExtensionProtoStr). Otherwise set to None.""" + + self.protectExtensionProtoStr = protectExtensionProtoStr + """#ifdef/#ifndef symbol to use around extension prototype declarations, if protectExtensionProto is set""" + self.apicall = apicall """string to use for the function declaration prefix, such as APICALL on Windows.""" @@ -118,6 +139,9 @@ def __init__(self, self.genAliasMacro = genAliasMacro """True if the OpenXR alias macro should be generated for aliased types (unclear what other circumstances this is useful)""" + self.genStructExtendsComment = genStructExtendsComment + """True if comments showing the structures whose pNext chain a structure extends are included before its definition""" + self.aliasMacro = aliasMacro """alias macro to inject when genAliasMacro is True""" @@ -148,10 +172,12 @@ def __init__(self, *args, **kwargs): def beginFile(self, genOpts): OutputGenerator.beginFile(self, genOpts) + if self.genOpts is None: + raise MissingGeneratorOptionsError() # C-specific # # Multiple inclusion protection & C++ wrappers. - if genOpts.protectFile and self.genOpts.filename: + if self.genOpts.protectFile and self.genOpts.filename: headerSym = re.sub(r'\.h', '_h_', os.path.basename(self.genOpts.filename)).upper() write('#ifndef', headerSym, file=self.outFile) @@ -173,6 +199,8 @@ def beginFile(self, genOpts): def endFile(self): # C-specific # Finish C++ wrapper and multiple inclusion protection + if self.genOpts is None: + raise MissingGeneratorOptionsError() self.newline() write('#ifdef __cplusplus', file=self.outFile) write('}', file=self.outFile) @@ -188,53 +216,97 @@ def beginFeature(self, interface, emit): OutputGenerator.beginFeature(self, interface, emit) # C-specific # Accumulate includes, defines, types, enums, function pointer typedefs, - # end function prototypes separately for this feature. They're only + # end function prototypes separately for this feature. They are only # printed in endFeature(). self.sections = {section: [] for section in self.ALL_SECTIONS} self.feature_not_empty = False + def _endProtectComment(self, protect_str, protect_directive='#ifdef'): + if protect_directive is None or protect_str is None: + raise RuntimeError('Should not call in here without something to protect') + + # Do not put comments after #endif closing blocks if this is not set + if not self.genOpts.conventions.protectProtoComment: + return '' + elif 'ifdef' in protect_directive: + return f' /* {protect_str} */' + else: + return f' /* !{protect_str} */' + def endFeature(self): "Actually write the interface to the output file." # C-specific if self.emit: if self.feature_not_empty: + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() + is_core = self.featureName and self.featureName.startswith(self.conventions.api_prefix + 'VERSION_') if self.genOpts.conventions.writeFeature(self.featureExtraProtect, self.genOpts.filename): self.newline() if self.genOpts.protectFeature: write('#ifndef', self.featureName, file=self.outFile) + # If type declarations are needed by other features based on # this one, it may be necessary to suppress the ExtraProtect, # or move it below the 'for section...' loop. if self.featureExtraProtect is not None: write('#ifdef', self.featureExtraProtect, file=self.outFile) self.newline() + + # Generate warning of possible use in IDEs + write(f'// {self.featureName} is a preprocessor guard. Do not pass it to API calls.', file=self.outFile) write('#define', self.featureName, '1', file=self.outFile) for section in self.TYPE_SECTIONS: contents = self.sections[section] if contents: write('\n'.join(contents), file=self.outFile) + if self.genOpts.genFuncPointers and self.sections['commandPointer']: write('\n'.join(self.sections['commandPointer']), file=self.outFile) self.newline() + if self.sections['command']: if self.genOpts.protectProto: write(self.genOpts.protectProto, self.genOpts.protectProtoStr, file=self.outFile) + if self.genOpts.protectExtensionProto and not is_core: + write(self.genOpts.protectExtensionProto, + self.genOpts.protectExtensionProtoStr, file=self.outFile) write('\n'.join(self.sections['command']), end='', file=self.outFile) + if self.genOpts.protectExtensionProto and not is_core: + write('#endif' + + self._endProtectComment(protect_directive=self.genOpts.protectExtensionProto, + protect_str=self.genOpts.protectExtensionProtoStr), + file=self.outFile) if self.genOpts.protectProto: - write('#endif', file=self.outFile) + write('#endif' + + self._endProtectComment(protect_directive=self.genOpts.protectProto, + protect_str=self.genOpts.protectProtoStr), + file=self.outFile) else: self.newline() + if self.featureExtraProtect is not None: - write('#endif /*', self.featureExtraProtect, '*/', file=self.outFile) + write('#endif' + + self._endProtectComment(protect_str=self.featureExtraProtect), + file=self.outFile) + if self.genOpts.protectFeature: - write('#endif /*', self.featureName, '*/', file=self.outFile) + write('#endif' + + self._endProtectComment(protect_str=self.featureName), + file=self.outFile) # Finish processing in superclass OutputGenerator.endFeature(self) def appendSection(self, section, text): "Append a definition to the specified section" - # self.sections[section].append('SECTION: ' + section + '\n') + + if section is None: + self.logMsg('error', 'Missing section in appendSection (probably a element missing its \'category\' attribute. Text:', text) + exit(1) + self.sections[section].append(text) self.feature_not_empty = True @@ -260,6 +332,8 @@ def genType(self, typeinfo, name, alias): # special-purpose generator. self.genStruct(typeinfo, name, alias) else: + if self.genOpts is None: + raise MissingGeneratorOptionsError() # OpenXR: this section was not under 'else:' previously, just fell through if alias: # If the type is an alias, just emit a typedef declaration @@ -267,13 +341,15 @@ def genType(self, typeinfo, name, alias): else: # Replace tags with an APIENTRY-style string # (from self.genOpts). Copy other text through unchanged. - # If the resulting text is an empty string, don't emit it. + # If the resulting text is an empty string, do not emit it. body = noneStr(typeElem.text) for elem in typeElem: if elem.tag == 'apientry': body += self.genOpts.apientry + noneStr(elem.tail) else: body += noneStr(elem.text) + noneStr(elem.tail) + if category == 'define' and self.misracppstyle(): + body = body.replace("(uint32_t)", "static_cast") if body: # Add extra newline after multi-line entries. if '\n' in body[0:-1]: @@ -284,7 +360,7 @@ def genProtectString(self, protect_str): """Generate protection string. Protection strings are the strings defining the OS/Platform/Graphics - requirements for a given OpenXR command. When generating the + requirements for a given API command. When generating the language header files, we need to make sure the items specific to a graphics API or OS platform are properly wrapped in #ifs.""" protect_if_str = '' @@ -293,7 +369,7 @@ def genProtectString(self, protect_str): return (protect_if_str, protect_end_str) if ',' in protect_str: - protect_list = protect_str.split(",") + protect_list = protect_str.split(',') protect_defs = ('defined(%s)' % d for d in protect_list) protect_def_str = ' && '.join(protect_defs) protect_if_str = '#if %s\n' % protect_def_str @@ -306,8 +382,10 @@ def genProtectString(self, protect_str): def typeMayAlias(self, typeName): if not self.may_alias: - # First time we've asked if a type may alias. - # So, let's populate the set of all names of types that may. + if self.registry is None: + raise MissingRegistryError() + # First time we have asked if a type may alias. + # So, populate the set of all names of types that may. # Everyone with an explicit mayalias="true" self.may_alias = set(typeName @@ -315,9 +393,9 @@ def typeMayAlias(self, typeName): if data.elem.get('mayalias') == 'true') # Every type mentioned in some other type's parentstruct attribute. - parent_structs = (otherType.elem.get('parentstruct') - for otherType in self.registry.typedict.values()) - self.may_alias.update(set(x for x in parent_structs + polymorphic_bases = (otherType.elem.get('parentstruct') + for otherType in self.registry.typedict.values()) + self.may_alias.update(set(x for x in polymorphic_bases if x is not None)) return typeName in self.may_alias @@ -335,6 +413,9 @@ def genStruct(self, typeinfo, typeName, alias): generate a typedef of that alias.""" OutputGenerator.genStruct(self, typeinfo, typeName, alias) + if self.genOpts is None: + raise MissingGeneratorOptionsError() + typeElem = typeinfo.elem if alias: @@ -344,6 +425,11 @@ def genStruct(self, typeinfo, typeName, alias): (protect_begin, protect_end) = self.genProtectString(typeElem.get('protect')) if protect_begin: body += protect_begin + + if self.genOpts.genStructExtendsComment: + structextends = typeElem.get('structextends') + body += '// ' + typeName + ' extends ' + structextends + '\n' if structextends else '' + body += 'typedef ' + typeElem.get('category') # This is an OpenXR-specific alternative where aliasing refers @@ -387,11 +473,16 @@ def genGroup(self, groupinfo, groupName, alias=None): body = 'typedef ' + alias + ' ' + groupName + ';\n' self.appendSection(section, body) else: + if self.genOpts is None: + raise MissingGeneratorOptionsError() (section, body) = self.buildEnumCDecl(self.genOpts.genEnumBeginEndRange, groupinfo, groupName) - self.appendSection(section, "\n" + body) + self.appendSection(section, '\n' + body) def genEnum(self, enuminfo, name, alias): - """Generate the C declaration for a constant (a single value).""" + """Generate the C declaration for a constant (a single value). + + tags may specify their values in several ways, but are usually + just integers.""" OutputGenerator.genEnum(self, enuminfo, name, alias) @@ -406,6 +497,8 @@ def genCmd(self, cmdinfo, name, alias): # prefix = '// ' + name + ' is an alias of command ' + alias + '\n' # else: # prefix = '' + if self.genOpts is None: + raise MissingGeneratorOptionsError() prefix = '' decls = self.makeCDecls(cmdinfo.elem) diff --git a/scripts/clconventions.py b/scripts/clconventions.py index dc4d95fe..2e601519 100644 --- a/scripts/clconventions.py +++ b/scripts/clconventions.py @@ -6,43 +6,44 @@ # Working-group-specific style conventions, # used in generation. +import os import re -from conventions import ConventionsBase +from spec_tools.conventions import ConventionsBase class OpenCLConventions(ConventionsBase): - def formatExtension(self, name): - """Mark up a name as an extension for the spec.""" - return '`<<{}>>`'.format(name) - @property def null(self): """Preferred spelling of NULL.""" return '`NULL`' - @property - def constFlagBits(self): - """Returns True if static const flag bits should be generated, False if an enumerated type should be generated.""" - return False + def formatVersion(self, name, apivariant, major, minor): + """Mark up an API version name as a link in the spec.""" + return f'`<>`' + + def formatExtension(self, name): + """Mark up a name as an extension for the spec.""" + return '`<<{}>>`'.format(name) @property def struct_macro(self): - return 'sname:' + return '' @property - def external_macro(self): - return 'code:' + def constFlagBits(self): + """Returns True if static const flag bits should be generated, False if an enumerated type should be generated.""" + return False @property def structtype_member_name(self): """Return name of the structure type member""" - return 'sType' + return 'type' @property def nextpointer_member_name(self): """Return name of the structure pointer chain member""" - return 'pNext' + return 'next' @property def valid_pointer_prefix(self): @@ -88,57 +89,55 @@ def api_name(self, spectype='api'): else: return None - @property - def xml_supported_name_of_api(self): - """Return the supported= attribute used in API XML""" - return 'opencl' - @property def api_prefix(self): """Return API token prefix""" return 'CL_' @property - def api_version_prefix(self): - """Return API core version token prefix""" - return 'CL_VERSION_' + def extension_name_prefix(self): + """Return extension name prefix""" + return 'cl_' - @property - def KHR_prefix(self): - """Return extension name prefix for KHR extensions""" - return 'cl_khr_' + def extension_short_description(self, elem): + """Return a short description of an extension for use in refpages.""" - @property - def EXT_prefix(self): - """Return extension name prefix for EXT extensions""" - return 'cl_ext_' + return 'OpenCL extension' @property def write_contacts(self): """Return whether contact list should be written to extension appendices""" - return True + return False @property - def write_refpage_include(self): - """Return whether refpage include should be written to extension appendices""" + def write_extension_type(self): + """Return whether extension type should be written to extension appendices""" return False - def writeFeature(self, featureExtraProtect, filename): - """Returns True if OutputGenerator.endFeature should write this feature. - Used in COutputGenerator - """ - return True + @property + def write_extension_number(self): + """Return whether extension number should be written to extension appendices""" + return False - def requires_error_validation(self, return_type): - """Returns True if the return_type element is an API result code - requiring error validation. - """ + @property + def write_extension_revision(self): + """Return whether extension revision number should be written to extension appendices""" return False @property - def required_errors(self): - """Return a list of required error codes for validation.""" - return [] + def write_refpage_include(self): + """Return whether refpage include should be written to extension appendices""" + return True + + @property + def KHR_prefix(self): + """Return extension name prefix for KHR extensions""" + return 'cl_khr_' + + @property + def EXT_prefix(self): + """Return extension name prefix for EXT extensions""" + return 'cl_ext_' def is_externsync_command(self, protoname): """Returns True if the protoname element is an API command requiring @@ -152,18 +151,6 @@ def is_api_name(self, name): """ return name[0:2].lower() == 'cl' - def is_voidpointer_alias(self, tag, text, tail): - """Return True if the declaration components (tag,text,tail) of an - element represents a void * type - """ - return tag == 'type' and text == 'void' and tail.startswith('*') - - def make_voidpointer_alias(self, tail): - """Reformat a void * declaration to include the API alias macro. - Vulkan doesn't have an API alias macro, so do nothing. - """ - return tail - def specURL(self, spectype = 'api'): """Return public registry URL which ref pages should link to for full Specification, so xrefs in the asciidoc source that aren't @@ -196,6 +183,25 @@ def specification_path(self): """Return relpath to the Asciidoctor specification sources in this project.""" return '../appendices/meta' + def extension_file_path(self, name): + """Return file path to an extension appendix relative to a directory + containing all such appendices. + - name - extension name + + Must implement.""" + + # Normally this would use self.file_suffix, but the OpenCL spec + # currently uses a variety of suffixing conventions. + return f'{name}.asciidoc' + + def extension_include_string(self, name): + """Return format string for include:: line for an extension appendix + file. + - name - extension name""" + + return 'include::{{apispec}}/{}[]'.format( + self.extension_file_path(name)) + @property def extra_refpage_headers(self): """Return any extra text to add to refpage headers.""" @@ -222,7 +228,7 @@ def unified_flag_refpages(self): @property def spec_reflow_path(self): """Return the relative path to the spec source folder to reflow""" - return '.' + return os.getcwd() @property def spec_no_reflow_dirs(self): @@ -240,3 +246,16 @@ def should_skip_checking_codes(self): Vulkan, so these checks are not appropriate.""" return True + + @property + def extra_refpage_body(self): + """Return any extra text (following the title) for generated + reference pages.""" + return 'include::{generated}/specattribs.adoc[]' + + @property + def docgen_language(self): + """Return the language to be used in docgenerator [source] + blocks.""" + + return 'opencl' diff --git a/scripts/docgenerator.py b/scripts/docgenerator.py index 4ebabd87..3692768c 100644 --- a/scripts/docgenerator.py +++ b/scripts/docgenerator.py @@ -7,18 +7,42 @@ from pathlib import Path from generator import GeneratorOptions, OutputGenerator, noneStr, write +from parse_dependency import dependencyLanguageComment -ENUM_TABLE_PREFIX = """ +_ENUM_TABLE_PREFIX = """ [cols=",",options="header",] -|======================================================================= +|==== |Enum |Description""" -ENUM_TABLE_SUFFIX = """|=======================================================================""" +_TABLE_SUFFIX = """|====""" -FLAG_BLOCK_PREFIX = """.Flag Descriptions +_ENUM_BLOCK_PREFIX = """.Enumerant Descriptions ****""" -FLAG_BLOCK_SUFFIX = """****""" +_FLAG_BLOCK_PREFIX = """.Flag Descriptions +****""" + +_BLOCK_SUFFIX = """****""" + +def orgLevelKey(name): + # Sort key for organization levels of features / extensions + # From highest to lowest, core versions, KHR extensions, EXT extensions, + # and vendor extensions + + prefixes = ( + 'VK_VERSION_', + 'VKSC_VERSION_', + 'VK_KHR_', + 'VK_EXT_') + + i = 0 + for prefix in prefixes: + if name.startswith(prefix): + return i + i += 1 + + # Everything else (e.g. vendor extensions) is least important + return i class DocGeneratorOptions(GeneratorOptions): @@ -124,8 +148,6 @@ class DocOutputGenerator(OutputGenerator): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - # Keep track of all extension numbers - self.extension_numbers = set() def beginFile(self, genOpts): OutputGenerator.beginFile(self, genOpts) @@ -141,28 +163,16 @@ def beginFeature(self, interface, emit): # Start processing in superclass OutputGenerator.beginFeature(self, interface, emit) - # Decide if we're in a core or an + # Decide if we are in a core or an self.in_core = (interface.tag == 'feature') - # Verify that each has a unique number during doc - # generation - # TODO move this to consistency_tools - if not self.in_core: - extension_number = interface.get('number') - if extension_number is not None and extension_number != "0": - if extension_number in self.extension_numbers: - self.logMsg('error', 'Duplicate extension number ', extension_number, ' detected in feature ', interface.get('name'), '\n') - exit(1) - else: - self.extension_numbers.add(extension_number) - def endFeature(self): # Finish processing in superclass OutputGenerator.endFeature(self) def genRequirements(self, name, mustBeFound = True): """Generate text showing what core versions and extensions introduce - an API. This relies on the map in api.py, which may be loaded at + an API. This relies on the map in apimap.py, which may be loaded at runtime into self.apidict. If not present, no message is generated. @@ -173,13 +183,40 @@ def genRequirements(self, name, mustBeFound = True): if self.apidict: if name in self.apidict.requiredBy: - features = [] + # It is possible to get both 'A with B' and 'B with A' for + # the same API. + # To simplify this, sort the (base,dependency) requirements + # and put them in a set to ensure they are unique. + features = set() + # 'dependency' may be a boolean expression of extension names for (base,dependency) in self.apidict.requiredBy[name]: if dependency is not None: - features.append('{} with {}'.format(base, dependency)) + # 'dependency' may be a boolean expression of extension + # names, in which case the sorting will not work well. + + # First, convert it from asciidoctor markup to language. + depLanguage = dependencyLanguageComment(dependency) + + # If they are the same, the dependency is only a + # single extension, and sorting them works. + # Otherwise, skip it. + if depLanguage == dependency: + deps = sorted( + sorted((base, dependency)), + key=orgLevelKey) + depString = ' with '.join(deps) + else: + # An expression with multiple extensions + depString = f'{base} with {depLanguage}' + + features.add(depString) else: - features.append(base) - return '// Provided by {}\n'.format(', '.join(features)) + features.add(base) + # Sort the overall dependencies so core versions are first + provider = ', '.join(sorted( + sorted(features), + key=orgLevelKey)) + return f'// Provided by {provider}\n' else: if mustBeFound: self.logMsg('warn', 'genRequirements: API {} not found'.format(name)) @@ -199,26 +236,26 @@ def writeInclude(self, directory, basename, contents): self.makeDir(directory) # Create file - filename = directory + '/' + basename + '.txt' + filename = directory + '/' + basename + self.file_suffix self.logMsg('diag', '# Generating include file:', filename) fp = open(filename, 'w', encoding='utf-8') # Asciidoc anchor write(self.genOpts.conventions.warning_comment, file=fp) - write('[[{0},{0}]]'.format(basename), file=fp) + write('[[{0}]]'.format(basename), file=fp) if self.genOpts.conventions.generate_index_terms: - index_terms = [] if basename.startswith(self.conventions.command_prefix): - index_terms.append(basename[2:] + " (function)") + index_term = basename + " (function)" elif basename.startswith(self.conventions.type_prefix): - index_terms.append(basename[2:] + " (type)") + index_term = basename + " (type)" elif basename.startswith(self.conventions.api_prefix): - index_terms.append(basename[len(self.conventions.api_prefix):] + " (define)") - index_terms.append(basename) - write('indexterm:[{}]'.format(','.join(index_terms)), file=fp) + index_term = basename + " (define)" + else: + index_term = basename + write('indexterm:[{}]'.format(index_term), file=fp) - write('[source%unbreakable,opencl]', file=fp) + write(f'[source%unbreakable,{self.conventions.docgen_language}]', file=fp) write('----', file=fp) write(contents, file=fp) write('----', file=fp) @@ -226,55 +263,69 @@ def writeInclude(self, directory, basename, contents): if self.genOpts.secondaryInclude: # Create secondary no cross-reference include file - filename = directory + '/' + basename + '.no-xref.txt' + filename = f'{directory}/{basename}.no-xref{self.file_suffix}' self.logMsg('diag', '# Generating include file:', filename) fp = open(filename, 'w', encoding='utf-8') # Asciidoc anchor write(self.genOpts.conventions.warning_comment, file=fp) write('// Include this no-xref version without cross reference id for multiple includes of same file', file=fp) - write('[source,%unbreakable,opencl]', file=fp) + write(f'[source,%unbreakable,{self.conventions.docgen_language}]', file=fp) write('----', file=fp) write(contents, file=fp) write('----', file=fp) fp.close() - def writeTable(self, basename, values): + def writeEnumTable(self, basename, values): """Output a table of enumerants.""" directory = Path(self.genOpts.directory) / 'enums' self.makeDir(str(directory)) - filename = str(directory / '{}.comments.txt'.format(basename)) + filename = str(directory / f'{basename}.comments{self.file_suffix}') self.logMsg('diag', '# Generating include file:', filename) with open(filename, 'w', encoding='utf-8') as fp: write(self.conventions.warning_comment, file=fp) - write(ENUM_TABLE_PREFIX, file=fp) + write(_ENUM_TABLE_PREFIX, file=fp) for data in values: write("|ename:{}".format(data['name']), file=fp) write("|{}".format(data['comment']), file=fp) - write(ENUM_TABLE_SUFFIX, file=fp) + write(_TABLE_SUFFIX, file=fp) - def writeFlagBox(self, basename, values): - """Output a box of flag bit comments.""" - directory = Path(self.genOpts.directory) / 'enums' - self.makeDir(str(directory)) - - filename = str(directory / '{}.comments.txt'.format(basename)) + def writeBox(self, filename, prefix, items): + """Write a generalized block/box for some values.""" self.logMsg('diag', '# Generating include file:', filename) with open(filename, 'w', encoding='utf-8') as fp: write(self.conventions.warning_comment, file=fp) - write(FLAG_BLOCK_PREFIX, file=fp) + write(prefix, file=fp) - for data in values: - write("* ename:{} -- {}".format(data['name'], - data['comment']), - file=fp) + for item in items: + write("* {}".format(item), file=fp) + + write(_BLOCK_SUFFIX, file=fp) + + def writeEnumBox(self, basename, values): + """Output a box of enumerants.""" + directory = Path(self.genOpts.directory) / 'enums' + self.makeDir(str(directory)) - write(FLAG_BLOCK_SUFFIX, file=fp) + filename = str(directory / f'{basename}.comments-box{self.file_suffix}') + self.writeBox(filename, _ENUM_BLOCK_PREFIX, + ("ename:{} -- {}".format(data['name'], data['comment']) + for data in values)) + + def writeFlagBox(self, basename, values): + """Output a box of flag bit comments.""" + directory = Path(self.genOpts.directory) / 'enums' + self.makeDir(str(directory)) + + filename = str(directory / f'{basename}.comments{self.file_suffix}') + self.writeBox(filename, _FLAG_BLOCK_PREFIX, + ("ename:{} -- {}".format(data['name'], data['comment']) + for data in values)) def genType(self, typeinfo, name, alias): """Generate type.""" @@ -288,6 +339,10 @@ def genType(self, typeinfo, name, alias): # If the type is a struct type, generate it using the # special-purpose generator. self.genStruct(typeinfo, name, alias) + elif category not in OutputGenerator.categoryToPath: + # If there is no path, do not write output + self.logMsg('diag', 'NOT writing include for {} category {}'.format( + name, category)) else: body = self.genRequirements(name) if alias: @@ -298,7 +353,7 @@ def genType(self, typeinfo, name, alias): else: # Replace tags with an APIENTRY-style string # (from self.genOpts). Copy other text through unchanged. - # If the resulting text is an empty string, don't emit it. + # If the resulting text is an empty string, do not emit it. body += noneStr(typeElem.text) for elem in typeElem: if elem.tag == 'apientry': @@ -307,32 +362,42 @@ def genType(self, typeinfo, name, alias): body += noneStr(elem.text) + noneStr(elem.tail) if body: - if category in OutputGenerator.categoryToPath: - self.writeInclude(OutputGenerator.categoryToPath[category], - name, body + '\n') - else: - self.logMsg('diag', '# NOT writing include file for type:', - name, '- bad category: ', category) + self.writeInclude(OutputGenerator.categoryToPath[category], + name, body + '\n') else: - self.logMsg('diag', '# NOT writing empty include file for type', name) + self.logMsg('diag', 'NOT writing empty include file for type', name) + + def genStructBody(self, typeinfo, typeName): + """ + Returns the body generated for a struct. + + Factored out to allow aliased types to also generate the original type. + """ + typeElem = typeinfo.elem + body = 'typedef ' + typeElem.get('category') + ' ' + typeName + ' {\n' + + targetLen = self.getMaxCParamTypeLength(typeinfo) + for member in typeElem.findall('.//member'): + body += self.makeCParamDecl(member, targetLen + 4) + body += ';\n' + body += '} ' + typeName + ';' + return body def genStruct(self, typeinfo, typeName, alias): """Generate struct.""" OutputGenerator.genStruct(self, typeinfo, typeName, alias) - typeElem = typeinfo.elem - body = self.genRequirements(typeName) if alias: + if self.conventions.duplicate_aliased_structs: + # TODO maybe move this outside the conditional? This would be a visual change. + body += '// {} is an alias for {}\n'.format(typeName, alias) + alias_info = self.registry.typedict[alias] + body += self.genStructBody(alias_info, alias) + body += '\n\n' body += 'typedef ' + alias + ' ' + typeName + ';\n' else: - body += 'typedef ' + typeElem.get('category') + ' ' + typeName + ' {\n' - - targetLen = self.getMaxCParamTypeLength(typeinfo) - for member in typeElem.findall('.//member'): - body += self.makeCParamDecl(member, targetLen + 4) - body += ';\n' - body += '} ' + typeName + ';' + body += self.genStructBody(typeinfo, typeName) self.writeInclude('structs', typeName, body) @@ -352,14 +417,14 @@ def genEnumTable(self, groupinfo, groupName): 'name': name, } - (numVal, strVal) = self.enumToValue(elem, True) + (numVal, _) = self.enumToValue(elem, True) data['value'] = numVal extname = elem.get('extname') added_by_extension_to_core = (extname is not None and self.in_core) if added_by_extension_to_core and not self.genOpts.extEnumerantAdditions: - # We're skipping such values + # We are skipping such values continue comment = elem.get('comment') @@ -370,7 +435,8 @@ def genEnumTable(self, groupinfo, groupName): # Just skip this silently continue else: - # Skip but record this in case it's an odd-one-out missing a comment. + # Skip but record this in case it is an odd-one-out missing + # a comment. missing_comments.append(name) continue @@ -393,16 +459,17 @@ def genEnumTable(self, groupinfo, groupName): group_type = groupinfo.elem.get('type') if groupName == self.result_type: # Split this into success and failure - self.writeTable(groupName + '.success', + self.writeEnumTable(groupName + '.success', (data for data in values if data['value'] >= 0)) - self.writeTable(groupName + '.error', + self.writeEnumTable(groupName + '.error', (data for data in values if data['value'] < 0)) elif group_type == 'bitmask': self.writeFlagBox(groupName, values) elif group_type == 'enum': - self.writeTable(groupName, values) + self.writeEnumTable(groupName, values) + self.writeEnumBox(groupName, values) else: raise RuntimeError("Unrecognized enums type: " + str(group_type)) @@ -437,17 +504,6 @@ def genCmd(self, cmdinfo, name, alias): "Generate command." OutputGenerator.genCmd(self, cmdinfo, name, alias) - return_type = cmdinfo.elem.find('proto/type') - if self.genOpts.conventions.requires_error_validation(return_type): - # This command returns an API result code, so check that it - # returns at least the required errors. - # TODO move this to consistency_tools - required_errors = set(self.genOpts.conventions.required_errors) - errorcodes = cmdinfo.elem.get('errorcodes').split(',') - if not required_errors.issubset(set(errorcodes)): - self.logMsg('error', 'Missing required error code for command: ', name, '\n') - exit(1) - body = self.genRequirements(name) decls = self.makeCDecls(cmdinfo.elem) body += decls[0] diff --git a/scripts/extdependency.py b/scripts/extdependency.py new file mode 100755 index 00000000..69dbec3c --- /dev/null +++ b/scripts/extdependency.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +# +# Copyright 2017-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +"""Generate a mapping of extension name -> all required extension names for + that extension, from dependencies in the API XML.""" + +import argparse +import errno +import xml.etree.ElementTree as etree +from pathlib import Path + +from apiconventions import APIConventions +from parse_dependency import dependencyNames + +class DiGraph: + """A directed graph. + + The implementation and API mimic that of networkx.DiGraph in networkx-1.11. + networkx implements graphs as nested dicts; it uses dicts all the way + down, no lists. + + Some major differences between this implementation and that of + networkx-1.11 are: + + * This omits edge and node attribute data, because we never use them + yet they add additional code complexity. + + * This returns iterator objects when possible instead of collection + objects, because it simplifies the implementation and should provide + better performance. + """ + + def __init__(self): + self.__nodes = {} + + def add_node(self, node): + if node not in self.__nodes: + self.__nodes[node] = DiGraphNode() + + def add_edge(self, src, dest): + self.add_node(src) + self.add_node(dest) + self.__nodes[src].adj.add(dest) + + def nodes(self): + """Iterate over the nodes in the graph.""" + return self.__nodes.keys() + + def descendants(self, node): + """ + Iterate over the nodes reachable from the given start node, excluding + the start node itself. Each node in the graph is yielded at most once. + """ + + # Implementation detail: Do a breadth-first traversal because it is + # easier than depth-first. + + # All nodes seen during traversal. + seen = set() + + # The stack of nodes that need visiting. + visit_me = [] + + # Bootstrap the traversal. + seen.add(node) + for x in self.__nodes[node].adj: + if x not in seen: + seen.add(x) + visit_me.append(x) + + while visit_me: + x = visit_me.pop() + assert x in seen + yield x + + for y in self.__nodes[x].adj: + if y not in seen: + seen.add(y) + visit_me.append(y) + +class DiGraphNode: + def __init__(self): + # Set of adjacent of nodes. + self.adj = set() + +class ApiDependencies: + def __init__(self, + registry_path = None, + api_name = None): + """Load an API registry and generate extension dependencies + + registry_path - relative filename of XML registry. If not specified, + uses the API default. + + api_name - API name for which to generate dependencies. Only + extensions supported for that API are considered. + """ + + conventions = APIConventions() + if registry_path is None: + registry_path = conventions.registry_path + if api_name is None: + api_name = conventions.xml_api_name + + self.allExts = set() + self.khrExts = set() + self.ratifiedExts = set() + self.graph = DiGraph() + self.extensions = {} + self.tree = etree.parse(registry_path) + + # Loop over all supported extensions, creating a digraph of the + # extension dependencies in the 'depends' attribute, which is a + # boolean expression of core version and extension names. + # A static dependency tree can be constructed only by treating all + # extension names in the expression as dependencies, even though + # that may not be true if it is of form (ext OR ext). + # For the purpose these dependencies are used for - generating + # specifications with required dependencies included automatically - + # this will suffice. + # Separately tracks lists of all extensions and all KHR extensions, + # which are common specification targets. + for elem in self.tree.findall('extensions/extension'): + name = elem.get('name') + supported = elem.get('supported') + ratified = elem.get('ratified', '') + + if api_name in supported.split(','): + self.allExts.add(name) + + if conventions.KHR_prefix in name: + self.khrExts.add(name) + + if api_name in ratified.split(','): + self.ratifiedExts.add(name) + + self.graph.add_node(name) + + depends = elem.get('depends') + if depends: + # Walk a list of the leaf nodes (version and extension + # names) in the boolean expression. + for dep in dependencyNames(depends): + # Filter out version names, which are explicitly + # specified when building a specification. + if not conventions.is_api_version_name(dep): + self.graph.add_edge(name, dep) + else: + # Skip unsupported extensions + pass + + def allExtensions(self): + """Returns a set of all extensions in the graph""" + return self.allExts + + def khrExtensions(self): + """Returns a set of all KHR extensions in the graph""" + return self.khrExts + + def ratifiedExtensions(self): + """Returns a set of all ratified extensions in the graph""" + return self.ratifiedExts + + def children(self, extension): + """Returns a set of the dependencies of an extension. + Throws an exception if the extension is not in the graph.""" + + if extension not in self.allExts: + raise Exception(f'Extension {extension} not found in XML!') + + return set(self.graph.descendants(extension)) + + +# Test script +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-registry', action='store', + default=APIConventions().registry_path, + help='Use specified registry file instead of ' + APIConventions().registry_path) + parser.add_argument('-loops', action='store', + default=10, type=int, + help='Number of timing loops to run') + parser.add_argument('-test', action='store', + default=None, + help='Specify extension to find dependencies of') + + args = parser.parse_args() + + deps = ApiDependencies(args.registry) + print('KHR exts =', sorted(deps.khrExtensions())) + print('Ratified exts =', sorted(deps.ratifiedExtensions())) + + import time + startTime = time.process_time() + + for loop in range(args.loops): + deps = ApiDependencies(args.registry) + + endTime = time.process_time() + + deltaT = endTime - startTime + print('Total time = {} time/loop = {}'.format(deltaT, deltaT / args.loops)) diff --git a/scripts/extensionmetadocgenerator.py b/scripts/extensionmetadocgenerator.py index 957cb5a4..bc38084e 100644 --- a/scripts/extensionmetadocgenerator.py +++ b/scripts/extensionmetadocgenerator.py @@ -9,6 +9,7 @@ import sys from functools import total_ordering from generator import GeneratorOptions, OutputGenerator, regSortFeatures, write +from parse_dependency import dependencyMarkup class ExtensionMetaDocGeneratorOptions(GeneratorOptions): """ExtensionMetaDocGeneratorOptions - subclass of GeneratorOptions. @@ -17,10 +18,6 @@ class ExtensionMetaDocGeneratorOptions(GeneratorOptions): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - -EXT_NAME_DECOMPOSE_RE = re.compile(r'[A-Z]+_(?P[A-Z]+)_(?P[\w_]+)') - - @total_ordering class Extension: def __init__(self, @@ -29,23 +26,23 @@ def __init__(self, name, number, ext_type, - requires, - requiresCore, + depends, contact, promotedTo, deprecatedBy, obsoletedBy, provisional, revision, - specialuse ): + specialuse, + ratified + ): self.generator = generator self.conventions = generator.genOpts.conventions self.filename = filename self.name = name self.number = number self.ext_type = ext_type - self.requires = requires - self.requiresCore = requiresCore + self.depends = depends self.contact = contact self.promotedTo = promotedTo self.deprecatedBy = deprecatedBy @@ -53,10 +50,16 @@ def __init__(self, self.provisional = provisional self.revision = revision self.specialuse = specialuse + self.ratified = ratified self.deprecationType = None self.supercedingAPIVersion = None self.supercedingExtension = None + # This is a set containing names of extensions (if any) promoted + # *to* this extension. + # It is filled in after all the Extension objects are created, + # since it requires a reverse mapping step. + self.promotedFrom = set() if self.promotedTo is not None and self.deprecatedBy is not None and self.obsoletedBy is not None: self.generator.logMsg('warn', 'All \'promotedto\', \'deprecatedby\' and \'obsoletedby\' attributes used on extension ' + self.name + '! Ignoring \'promotedto\' and \'deprecatedby\'.') @@ -83,15 +86,11 @@ def __init__(self, pass # supercedingAPIVersion, supercedingExtension is None elif supercededBy.startswith(self.conventions.api_version_prefix): self.supercedingAPIVersion = supercededBy - elif supercededBy.startswith(self.conventions.api_prefix): + elif supercededBy.startswith(self.conventions.extension_name_prefix): self.supercedingExtension = supercededBy else: self.generator.logMsg('error', 'Unrecognized ' + self.deprecationType + ' attribute value \'' + supercededBy + '\'!') - match = EXT_NAME_DECOMPOSE_RE.match(self.name) - self.vendor = match.group('tag') - self.bare_name = match.group('name') - def __str__(self): return self.name def __eq__(self, other): @@ -174,8 +173,13 @@ def conditionalLinkExt(self, extName, indent = ' '): return doc - def resolveDeprecationChain(self, extensionsList, succeededBy, isRefpage, file): - ext = next(x for x in extensionsList if x.name == succeededBy) + def resolveDeprecationChain(self, extensions, succeededBy, isRefpage, file): + if succeededBy not in extensions: + write(f' ** *NOTE* The extension `{succeededBy}` is not supported for the API specification being generated', file=file) + self.generator.logMsg('warn', f'resolveDeprecationChain: {self.name} defines a superceding interface {succeededBy} which is not in the supported extensions list') + return + + ext = extensions[succeededBy] if ext.deprecationType: if ext.deprecationType == 'promotion': @@ -183,13 +187,13 @@ def resolveDeprecationChain(self, extensionsList, succeededBy, isRefpage, file): write(' ** Which in turn was _promoted_ to\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-promotions', isRefpage), file=file) else: # ext.supercedingExtension write(' ** Which in turn was _promoted_ to extension\n' + ext.conditionalLinkExt(ext.supercedingExtension), file=file) - ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) + ext.resolveDeprecationChain(extensions, ext.supercedingExtension, file) elif ext.deprecationType == 'deprecation': if ext.supercedingAPIVersion: write(' ** Which in turn was _deprecated_ by\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-new-feature', isRefpage), file=file) elif ext.supercedingExtension: write(' ** Which in turn was _deprecated_ by\n' + ext.conditionalLinkExt(ext.supercedingExtension) + ' extension', file=file) - ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) + ext.resolveDeprecationChain(extensions, ext.supercedingExtension, file) else: write(' ** Which in turn was _deprecated_ without replacement', file=file) elif ext.deprecationType == 'obsoletion': @@ -197,7 +201,7 @@ def resolveDeprecationChain(self, extensionsList, succeededBy, isRefpage, file): write(' ** Which in turn was _obsoleted_ by\n' + ext.conditionalLinkCoreAPI(ext.supercedingAPIVersion, '-new-feature', isRefpage), file=file) elif ext.supercedingExtension: write(' ** Which in turn was _obsoleted_ by\n' + ext.conditionalLinkExt(ext.supercedingExtension) + ' extension', file=file) - ext.resolveDeprecationChain(extensionsList, ext.supercedingExtension, file) + ext.resolveDeprecationChain(extensions, ext.supercedingExtension, file) else: write(' ** Which in turn was _obsoleted_ without replacement', file=file) else: # should be unreachable @@ -207,6 +211,11 @@ def resolveDeprecationChain(self, extensionsList, succeededBy, isRefpage, file): def writeTag(self, tag, value, isRefpage, fp): """Write a tag and (if non-None) a tag value to a file. + If the value is None, just write the tag. + + If the tag is None, just write the value (used for adding a value + to a just-written tag). + - tag - string tag name - value - tag value, or None - isRefpage - controls style in which the tag is marked up @@ -221,18 +230,20 @@ def writeTag(self, tag, value, isRefpage, fp): tagPrefix = '*' tagSuffix = '*::' - write(tagPrefix + tag + tagSuffix, file=fp) + if tag is not None: + write(tagPrefix + tag + tagSuffix, file=fp) if value is not None: write(value, file=fp) if isRefpage: write('', file=fp) - def makeMetafile(self, extensionsList, isRefpage = False): + def makeMetafile(self, extensions, isRefpage = False): """Generate a file containing extension metainformation in asciidoctor markup form. - - extensionsList - list of extensions spec is being generated against + - extensions - dictionary of Extension objects for extensions spec + is being generated against - isRefpage - True if generating a refpage include, False if generating a specification extension appendix include""" @@ -249,20 +260,39 @@ def makeMetafile(self, extensionsList, isRefpage = False): write('', file=fp) self.writeTag('Name String', '`' + self.name + '`', isRefpage, fp) - self.writeTag('Extension Type', self.typeToStr(), isRefpage, fp) + if self.conventions.write_extension_type: + self.writeTag('Extension Type', self.typeToStr(), isRefpage, fp) + + if self.conventions.write_extension_number: + self.writeTag('Registered Extension Number', self.number, isRefpage, fp) + if self.conventions.write_extension_revision: + self.writeTag('Revision', self.revision, isRefpage, fp) - self.writeTag('Registered Extension Number', self.number, isRefpage, fp) - self.writeTag('Revision', self.revision, isRefpage, fp) + if self.conventions.xml_api_name in self.ratified.split(','): + ratstatus = 'Ratified' + else: + ratstatus = 'Not ratified' + self.writeTag('Ratification Status', ratstatus, isRefpage, fp) # Only API extension dependencies are coded in XML, others are explicit self.writeTag('Extension and Version Dependencies', None, isRefpage, fp) - write(' * Requires ' + self.conventions.api_name() + ' ' + self.requiresCore, file=fp) - if self.requires: - for dep in self.requires.split(','): - write(' * Requires', self.conventions.formatExtension(dep), - file=fp) - if self.provisional == 'true': + # Transform the boolean 'depends' expression into equivalent + # human-readable asciidoc markup. + if self.depends is not None: + if isRefpage: + separator = '' + else: + separator = '+' + write(separator + '\n--\n' + + dependencyMarkup(self.depends) + + '--', file=fp) + else: + # Do not specify the base API redundantly, but put something + # here to avoid formatting trouble. + self.writeTag(None, 'None', isRefpage, fp) + + if self.provisional == 'true' and self.conventions.provisional_extension_warning: write(' * *This is a _provisional_ extension and must: be used with caution.', file=fp) write(' See the ' + self.specLink(xrefName = 'boilerplate-provisional-header', @@ -272,20 +302,20 @@ def makeMetafile(self, extensionsList, isRefpage = False): write('', file=fp) if self.deprecationType: - self.writeTag('Deprecation state', None, isRefpage, fp) + self.writeTag('Deprecation State', None, isRefpage, fp) if self.deprecationType == 'promotion': if self.supercedingAPIVersion: write(' * _Promoted_ to\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-promotions', isRefpage), file=fp) else: # ext.supercedingExtension write(' * _Promoted_ to\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension', file=fp) - self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) + self.resolveDeprecationChain(extensions, self.supercedingExtension, isRefpage, fp) elif self.deprecationType == 'deprecation': if self.supercedingAPIVersion: write(' * _Deprecated_ by\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-new-features', isRefpage), file=fp) elif self.supercedingExtension: write(' * _Deprecated_ by\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension' , file=fp) - self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) + self.resolveDeprecationChain(extensions, self.supercedingExtension, isRefpage, fp) else: write(' * _Deprecated_ without replacement' , file=fp) elif self.deprecationType == 'obsoletion': @@ -293,7 +323,7 @@ def makeMetafile(self, extensionsList, isRefpage = False): write(' * _Obsoleted_ by\n' + self.conditionalLinkCoreAPI(self.supercedingAPIVersion, '-new-features', isRefpage), file=fp) elif self.supercedingExtension: write(' * _Obsoleted_ by\n' + self.conditionalLinkExt(self.supercedingExtension) + ' extension' , file=fp) - self.resolveDeprecationChain(extensionsList, self.supercedingExtension, isRefpage, fp) + self.resolveDeprecationChain(extensions, self.supercedingExtension, isRefpage, fp) else: # TODO: Does not make sense to retroactively ban use of extensions from 1.0. # Needs some tweaks to the semantics and this message, when such extension(s) occur. @@ -332,10 +362,10 @@ def makeMetafile(self, extensionsList, isRefpage = False): if handle.startswith('gitlab:'): prettyHandle = 'icon:gitlab[alt=GitLab, role="red"]' + handle.replace('gitlab:@', '') elif handle.startswith('@'): - issuePlaceholderText = '[' + self.name + '] ' + handle - issuePlaceholderText += '%0A<>' - trackerLink = 'link:++https://github.com/KhronosGroup/Vulkan-Docs/issues/new?body=' + issuePlaceholderText + '++' - prettyHandle = trackerLink + '[icon:github[alt=GitHub,role="black"]' + handle[1:] + ', window=_blank]' + issuePlaceholderText = f'[{self.name}]{handle}' + issuePlaceholderText += f'%0A*Here describe the issue or question you have about the {self.name} extension*' + trackerLink = f'link:++https://github.com/KhronosGroup/Vulkan-Docs/issues/new?body={issuePlaceholderText}++' + prettyHandle = f'{trackerLink}[icon:github[alt=GitHub,role="black"]{handle[1:]},window=_blank,opts=nofollow]' else: prettyHandle = handle @@ -346,13 +376,49 @@ def makeMetafile(self, extensionsList, isRefpage = False): # current repository, and link to the same document (parameterized # by a URL prefix attribute) if it does. # The assumption is that a proposal document for an extension - # VK_name will be located in 'proposals/VK_name.asciidoc' relative + # VK_name will be located in 'proposals/VK_name.adoc' relative # to the repository root, and that this script will be invoked from # the repository root. - path = 'proposals/{}.asciidoc'.format(self.name) - if os.path.exists(path) and os.access(path, os.R_OK): - self.writeTag('Extension Proposal', - 'link:{{specRepositoryURL}}/{}[{}]'.format(path, self.name), isRefpage, fp) + # If a proposal for this extension does not exist, look for + # proposals for the extensions it is promoted from. + + def checkProposal(extname): + """Check if a proposal document for an extension exists, + returning the path to that proposal or None otherwise.""" + + path = 'proposals/{}.adoc'.format(extname) + if os.path.exists(path) and os.access(path, os.R_OK): + return path + else: + return None + + # List of [ extname, proposal link ] + proposals = [] + + path = checkProposal(self.name) + if path is not None: + proposals.append([self.name, path]) + else: + for name in self.promotedFrom: + path = checkProposal(name) + if path is not None: + proposals.append([name, path]) + + if len(proposals) > 0: + tag = 'Extension Proposal' + for (name, path) in sorted(proposals): + self.writeTag(tag, + f'link:{{specRepositoryURL}}/{path}[{name}]', + isRefpage, fp) + # Setting tag = None so additional values will not get + # additional tag headers. + tag = None + + # If this is metadata to be included in a refpage, adjust the + # leveloffset to account for the relative structure of the extension + # appendices vs. refpages. + if isRefpage and self.conventions.include_extension_appendix_in_refpage: + write(':leveloffset: -1', file=fp) fp.close() @@ -366,8 +432,7 @@ class ExtensionMetaDocOutputGenerator(OutputGenerator): - number extension number (optional) - contact name and GitHub login or email address (optional) - type 'instance' | 'device' (optional) - - requires list of comma-separated required API extensions (optional) - - requiresCore required core version of API (optional) + - depends boolean expression of core version and extension names this depends on (optional) - promotedTo extension or API version it was promoted to - deprecatedBy extension or API version which deprecated this extension, or empty string if deprecated without replacement @@ -377,7 +442,7 @@ class ExtensionMetaDocOutputGenerator(OutputGenerator): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.extensions = [] + self.extensions = {} # List of strings containing all vendor tags self.vendor_tags = [] self.file_suffix = '' @@ -432,46 +497,54 @@ def conditionalExt(self, extName, content, ifdef = None, condition = None): return doc - def makeExtensionInclude(self, ext): - return self.conventions.extension_include_string(ext) + def makeExtensionInclude(self, extname): + return self.conventions.extension_include_string(extname) def endFile(self): - self.extensions.sort() + # Determine the extension an extension is promoted from, if any. + # This is used when attempting to locate a proposal document in + # makeMetafile() below. + for (extname, ext) in self.extensions.items(): + promotedTo = ext.promotedTo + if promotedTo is not None: + if promotedTo in self.extensions: + #print(f'{promotedTo} is promoted from {extname}') + self.extensions[promotedTo].promotedFrom.add(extname) + #print(f'setting self.extensions[{promotedTo}].promotedFrom = {self.extensions[promotedTo].promotedFrom}') + elif not self.conventions.is_api_version_name(promotedTo): + self.logMsg('warn', f'{extname} is promoted to {promotedTo} which is not in the extension map') # Generate metadoc extension files, in refpage and non-refpage form - for ext in self.extensions: + for ext in self.extensions.values(): ext.makeMetafile(self.extensions, isRefpage = False) if self.conventions.write_refpage_include: ext.makeMetafile(self.extensions, isRefpage = True) + # Key to sort extensions alphabetically within 'KHR', 'EXT', vendor + # extension prefixes. + def makeSortKey(extname): + name = extname.lower() + prefixes = self.conventions.extension_index_prefixes + for i, prefix in enumerate(prefixes): + if extname.startswith(prefix): + return (i, name) + return (len(prefixes), name) + # Generate list of promoted extensions promotedExtensions = {} - for ext in self.extensions: + for ext in self.extensions.values(): if ext.deprecationType == 'promotion' and ext.supercedingAPIVersion: - promotedExtensions.setdefault(ext.supercedingAPIVersion, []).append(ext) + promotedExtensions.setdefault(ext.supercedingAPIVersion, []).append(ext.name) for coreVersion, extensions in promotedExtensions.items(): promoted_extensions_fp = self.newFile(self.directory + '/promoted_extensions_' + coreVersion + self.file_suffix) - for ext in extensions: + for extname in sorted(extensions, key=makeSortKey): indent = '' - write(' * {blank}\n+\n' + ext.conditionalLinkExt(ext.name, indent), file=promoted_extensions_fp) + write(' * {blank}\n+\n' + ext.conditionalLinkExt(extname, indent), file=promoted_extensions_fp) promoted_extensions_fp.close() - # Re-sort to match earlier behavior - # TODO: Remove this extra sort when re-arranging section order OK. - - def makeSortKey(ext): - name = ext.name.lower() - prefixes = self.conventions.extension_index_prefixes - for i, prefix in enumerate(prefixes): - if ext.name.startswith(prefix): - return (i, name) - return (len(prefixes), name) - - self.extensions.sort(key=makeSortKey) - # Generate include directives for the extensions appendix, grouping # extensions by status (current, deprecated, provisional, etc.) with self.newFile(self.directory + '/current_extensions_appendix' + self.file_suffix) as current_extensions_appendix_fp, \ @@ -486,8 +559,14 @@ def makeSortKey(ext): self.newFile(self.directory + '/provisional_extension_appendices_toc' + self.file_suffix) as provisional_extension_appendices_toc_fp, \ self.newFile(self.directory + '/provisional_extensions_guard_macro' + self.file_suffix) as provisional_extensions_guard_macro_fp: + # Note: there is a hardwired assumption in creating the + # include:: directives below that all of these files are located + # in the 'meta/' subdirectory of the generated files directory. + # This is difficult to change, and it is very unlikely changing + # it will be needed. + write('', file=current_extensions_appendix_fp) - write('include::deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=current_extensions_appendix_fp) + write('include::{generated}/meta/deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=current_extensions_appendix_fp) write('', file=current_extensions_appendix_fp) write('ifndef::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) write('[[extension-appendices-list]]', file=current_extensions_appendix_fp) @@ -498,19 +577,19 @@ def makeSortKey(ext): write('== List of Current Extensions', file=current_extensions_appendix_fp) write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=current_extensions_appendix_fp) write('', file=current_extensions_appendix_fp) - write('include::current_extension_appendices_toc' + self.file_suffix + '[]', file=current_extensions_appendix_fp) + write('include::{generated}/meta/current_extension_appendices_toc' + self.file_suffix + '[]', file=current_extensions_appendix_fp) write('\n<<<\n', file=current_extensions_appendix_fp) - write('include::current_extension_appendices' + self.file_suffix + '[]', file=current_extensions_appendix_fp) + write('include::{generated}/meta/current_extension_appendices' + self.file_suffix + '[]', file=current_extensions_appendix_fp) write('', file=deprecated_extensions_appendix_fp) - write('include::deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) + write('include::{generated}/meta/deprecated_extensions_guard_macro' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) write('', file=deprecated_extensions_appendix_fp) write('ifdef::HAS_DEPRECATED_EXTENSIONS[]', file=deprecated_extensions_appendix_fp) write('[[deprecated-extension-appendices-list]]', file=deprecated_extensions_appendix_fp) write('== List of Deprecated Extensions', file=deprecated_extensions_appendix_fp) - write('include::deprecated_extension_appendices_toc' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) + write('include::{generated}/meta/deprecated_extension_appendices_toc' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) write('\n<<<\n', file=deprecated_extensions_appendix_fp) - write('include::deprecated_extension_appendices' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) + write('include::{generated}/meta/deprecated_extension_appendices' + self.file_suffix + '[]', file=deprecated_extensions_appendix_fp) write('endif::HAS_DEPRECATED_EXTENSIONS[]', file=deprecated_extensions_appendix_fp) # add include guards to allow multiple includes @@ -520,18 +599,22 @@ def makeSortKey(ext): write(':PROVISIONAL_EXTENSIONS_GUARD_MACRO_INCLUDE_GUARD:\n', file=provisional_extensions_guard_macro_fp) write('', file=provisional_extensions_appendix_fp) - write('include::provisional_extensions_guard_macro' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) + write('include::{generated}/meta/provisional_extensions_guard_macro' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) write('', file=provisional_extensions_appendix_fp) write('ifdef::HAS_PROVISIONAL_EXTENSIONS[]', file=provisional_extensions_appendix_fp) write('[[provisional-extension-appendices-list]]', file=provisional_extensions_appendix_fp) write('== List of Provisional Extensions', file=provisional_extensions_appendix_fp) - write('include::provisional_extension_appendices_toc' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) + write('include::{generated}/meta/provisional_extension_appendices_toc' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) write('\n<<<\n', file=provisional_extensions_appendix_fp) - write('include::provisional_extension_appendices' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) + write('include::{generated}/meta/provisional_extension_appendices' + self.file_suffix + '[]', file=provisional_extensions_appendix_fp) write('endif::HAS_PROVISIONAL_EXTENSIONS[]', file=provisional_extensions_appendix_fp) - for ext in self.extensions: - include = self.makeExtensionInclude(ext) + # Emit extensions in author ID order + sorted_keys = sorted(self.extensions.keys(), key=makeSortKey) + for name in sorted_keys: + ext = self.extensions[name] + + include = self.makeExtensionInclude(ext.name) link = ' * ' + self.conventions.formatExtension(ext.name) if ext.provisional == 'true': write(self.conditionalExt(ext.name, include), file=provisional_extension_appendices_fp) @@ -564,22 +647,28 @@ def beginFeature(self, interface, emit): self.logMsg('diag', 'beginFeature: ignoring non-extension feature', self.featureName) return - # These attributes must exist name = self.featureName - number = self.getAttrib(interface, 'number') - ext_type = self.getAttrib(interface, 'type') - revision = self.getSpecVersion(interface, name) + + # These attributes may be required to exist, depending on the API + number = self.getAttrib(interface, 'number', + self.conventions.write_extension_number) + ext_type = self.getAttrib(interface, 'type', + self.conventions.write_extension_type) + if self.conventions.write_extension_revision: + revision = self.getSpecVersion(interface, name) + else: + revision = None # These attributes are optional OPTIONAL = False - requires = self.getAttrib(interface, 'requires', OPTIONAL) - requiresCore = self.getAttrib(interface, 'requiresCore', OPTIONAL, '1.0') # TODO update this line with update_version.py + depends = self.getAttrib(interface, 'depends', OPTIONAL) # TODO should default to base API version 1.0? contact = self.getAttrib(interface, 'contact', OPTIONAL) promotedTo = self.getAttrib(interface, 'promotedto', OPTIONAL) deprecatedBy = self.getAttrib(interface, 'deprecatedby', OPTIONAL) obsoletedBy = self.getAttrib(interface, 'obsoletedby', OPTIONAL) provisional = self.getAttrib(interface, 'provisional', OPTIONAL, 'false') specialuse = self.getAttrib(interface, 'specialuse', OPTIONAL) + ratified = self.getAttrib(interface, 'ratified', OPTIONAL, '') filename = self.directory + '/' + name + self.file_suffix @@ -589,17 +678,16 @@ def beginFeature(self, interface, emit): name = name, number = number, ext_type = ext_type, - requires = requires, - requiresCore = requiresCore, + depends = depends, contact = contact, promotedTo = promotedTo, deprecatedBy = deprecatedBy, obsoletedBy = obsoletedBy, provisional = provisional, revision = revision, - specialuse = specialuse) - self.extensions.append(extdata) - + specialuse = specialuse, + ratified = ratified) + self.extensions[name] = extdata def endFeature(self): # Finish processing in superclass @@ -636,6 +724,7 @@ def numbersToWords(self, name): def getSpecVersion(self, elem, extname, default=None): """Determine the extension revision from the EXTENSION_NAME_SPEC_VERSION enumerant. + This only makes sense for Vulkan. - elem - element to query - extname - extension name from the 'name' attribute diff --git a/scripts/find_adoc_deps b/scripts/find_adoc_deps new file mode 100755 index 00000000..d20d0497 --- /dev/null +++ b/scripts/find_adoc_deps @@ -0,0 +1,17 @@ +#!/bin/bash + +# Copyright 2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +# find_adoc_deps - find include:: dependencies of an asciidoc file +# Usage: find_adoc_deps file GENERATED +# Prints a space-separated list of file dependencies determined from the +# include:: constructs in 'file'. +# The literal text '{generated}' in an include:: path is substituted +# with the value of the second argument. + +if test -f $1 ; then + echo `grep '^include::' $1 | sed -e 's/^include:://' -e 's/\[\]/ /' -e "s#{generated}#$2#"` +else + exit 1 +fi diff --git a/scripts/genRef.py b/scripts/genRef.py index ed7f2580..2b103761 100755 --- a/scripts/genRef.py +++ b/scripts/genRef.py @@ -14,27 +14,55 @@ import re import sys from collections import OrderedDict -from reflib import (findRefs, fixupRefs, loadFile, logDiag, logWarn, +from reflib import (findRefs, fixupRefs, loadFile, logDiag, logWarn, logErr, printPageInfo, setLogFile) from reg import Registry -from clconventions import OpenCLConventions as APIConventions +from generator import GeneratorOptions +from parse_dependency import dependencyNames +from apiconventions import APIConventions + + +# refpage 'type' attributes which are API entities and contain structured +# content such as API includes, valid usage blocks, etc. +refpage_api_types = ( + 'basetypes', + 'consts', + 'defines', + 'enums', + 'flags', + 'funcpointers', + 'handles', + 'protos', + 'structs', +) + +# Other refpage types - SPIR-V builtins, API feature blocks, etc. - which do +# not have structured content. +refpage_other_types = ( + 'builtins', + 'feature', + 'freeform', + 'spirv' +) def makeExtensionInclude(name): - """Return an include command, given an extension name.""" - return 'include::{}/refpage.{}{}[]'.format( - conventions.specification_path, - name, - conventions.file_suffix) + """Return an include command for a generated extension interface. + - name - extension name""" + + return 'include::{}/meta/refpage.{}{}[]'.format( + conventions.generated_include_path, + name, + conventions.file_suffix) def makeAPIInclude(type, name): """Return an include command for a generated API interface - - type - type of the API, e.g. 'flags', 'handles', etc - - name - name of the API""" + - type - type of the API, e.g. 'flags', 'handles', etc + - name - name of the API""" return 'include::{}/api/{}/{}{}\n'.format( - conventions.refpage_generated_include_path, + conventions.generated_include_path, type, name, conventions.file_suffix) @@ -51,23 +79,30 @@ def printCopyrightSourceComments(fp): Writes an asciidoc comment block, which copyrights the source file.""" - print('// Copyright 2014-2024 The Khronos Group, Inc.', file=fp) + print('// Copyright 2014-2024 The Khronos Group Inc.', file=fp) print('//', file=fp) # This works around constraints of the 'reuse' tool print('// SPDX' + '-License-Identifier: CC-BY-4.0', file=fp) print('', file=fp) -def printFooter(fp): +def printFooter(fp, leveloffset=0): """Print footer material at the end of each refpage on open file fp. If generating separate refpages, adds the copyright. - If generating the single combined refpage, just add a separator.""" + If generating the single combined refpage, just add a separator. + + - leveloffset - number of levels to bias section titles up or down.""" + + # Generate the section header. + # Default depth is 2. + depth = max(0, leveloffset + 2) + prefix = '=' * depth print('ifdef::doctype-manpage[]', - '== Copyright', + f'{prefix} Copyright', '', - 'include::{config}/copyright-ccby.txt[]', + 'include::{config}/copyright-ccby' + conventions.file_suffix + '[]', 'endif::doctype-manpage[]', '', 'ifndef::doctype-manpage[]', @@ -89,7 +124,7 @@ def macroPrefix(name): if name in api.enums: return 'elink:' + name if name in api.flags: - return 'elink:' + name + return 'tlink:' + name if name in api.funcpointers: return 'tlink:' + name if name in api.handles: @@ -137,7 +172,11 @@ def seeAlsoList(apiName, explicitRefs=None, apiAliases=[]): for (base,dependency) in api.requiredBy[name]: refs.add(base) if dependency is not None: - refs.add(dependency) + # 'dependency' may be a boolean expression of extension + # names. + # Extract them for use in cross-references. + for extname in dependencyNames(dependency): + refs.add(extname) if len(refs) == 0: return None @@ -184,7 +223,7 @@ def refPageShell(pageName, pageDesc, fp, head_content = None, sections=None, tai """Generate body of a reference page. - pageName - string name of the page - - pageDesc - string short description of the page + - pageDesc - string short description of the page, or empty string - fp - file to write to - head_content - text to include before the sections - sections - iterable returning (title,body) for each section. @@ -195,14 +234,18 @@ def refPageShell(pageName, pageDesc, fp, head_content = None, sections=None, tai print(':data-uri:', ':icons: font', + ':attribute-missing: warn', conventions.extra_refpage_headers, '', sep='\n', file=fp) s = '{}({})'.format(pageName, man_section) print('= ' + s, + '', + conventions.extra_refpage_body, '', sep='\n', file=fp) + if pageDesc.strip() == '': pageDesc = 'NO SHORT DESCRIPTION PROVIDED' logWarn('refPageHead: no short description provided for', pageName) @@ -265,7 +308,8 @@ def refPageTail(pageName, specAnchor=None, seeAlso=None, fp=None, - auto=False): + auto=False, + leveloffset=0): """Generate end boilerplate of a reference page. - pageName - name of the page @@ -273,7 +317,12 @@ def refPageTail(pageName, identifying the specification name and URL this refpage links to. - specAnchor - None or the 'anchor' attribute from the refpage block, identifying the anchor in the specification this refpage links to. If - None, the pageName is assumed to be a valid anchor.""" + None, the pageName is assumed to be a valid anchor. + - seeAlso - text of the "See Also" section + - fp - file to write the page to + - auto - True if this is an entirely generated refpage, False if it is + handwritten content from the spec. + - leveloffset - number of levels to bias section titles up or down.""" specName = conventions.api_name(specType) specURL = conventions.specURL(specType) @@ -302,19 +351,24 @@ def refPageTail(pageName, 'not directly.', )) - print('== See Also', + # Generate the section header. + # Default depth is 2. + depth = max(0, leveloffset + 2) + prefix = '=' * depth + + print(f'{prefix} See Also', '', seeAlso, '', sep='\n', file=fp) - print('== Document Notes', + print(f'{prefix} Document Notes', '', '\n'.join(notes), '', sep='\n', file=fp) - printFooter(fp) + printFooter(fp, leveloffset) def xrefRewriteInitialize(): @@ -324,26 +378,26 @@ def xrefRewriteInitialize(): global refLinkTextPattern, refLinkTextSubstitute global specLinkPattern, specLinkSubstitute - # These are xrefs to Vulkan API entities, rewritten to link to refpages + # These are xrefs to API entities, rewritten to link to refpages # The refLink variants are for xrefs with only an anchor and no text. # The refLinkText variants are for xrefs with both anchor and text - refLinkPattern = re.compile(r'<<([Vv][Kk][^>,]+)>>') + refLinkPattern = re.compile(r'<<([Vv][Kk][A-Za-z0-9_]+)>>') refLinkSubstitute = r'link:\1.html[\1^]' - refLinkTextPattern = re.compile(r'<<([Vv][Kk][^>,]+)[,]?[ \t\n]*([^>,]*)>>') + refLinkTextPattern = re.compile(r'<<([Vv][Kk][A-Za-z0-9_]+)[,]?[ \t\n]*([^>,]*)>>') refLinkTextSubstitute = r'link:\1.html[\2^]' # These are xrefs to other anchors, rewritten to link to the spec - specLinkPattern = re.compile(r'<<([^>,]+)[,]?[ \t\n]*([^>,]*)>>') + specLinkPattern = re.compile(r'<<([-A-Za-z0-9_.(){}:]+)[,]?[ \t\n]*([^>,]*)>>') # Unfortunately, specLinkSubstitute depends on the link target, - # so can't be constructed in advance. + # so cannot be constructed in advance. specLinkSubstitute = None def xrefRewrite(text, specURL): """Rewrite asciidoctor xrefs in text to resolve properly in refpages. - Xrefs which are to Vulkan refpages are rewritten to link to those + Xrefs which are to refpages are rewritten to link to those refpages. The remainder are rewritten to generate external links into the supplied specification document URL. @@ -372,7 +426,7 @@ def emitPage(baseDir, specDir, pi, file): - specDir - directory extracted page source came from - pi - pageInfo for this page relative to file - file - list of strings making up the file, indexed by pi""" - pageName = baseDir + '/' + pi.name + '.txt' + pageName = f'{baseDir}/{pi.name}{conventions.file_suffix}' # Add a dictionary entry for this page global genDict @@ -387,9 +441,9 @@ def emitPage(baseDir, specDir, pi, file): field = None fieldText = None - if pi.type != 'freeform' and pi.type != 'spirv': + # Only do structural checks on API pages + if pi.type in refpage_api_types: if pi.include is None: - # Not sure how this happens yet logWarn('emitPage:', pageName, 'INCLUDE is None, no page generated') return @@ -421,9 +475,12 @@ def emitPage(baseDir, specDir, pi, file): logWarn('emitPage: INCLUDE == BODY, so description will be empty for', pi.name) if pi.begin != pi.include: logWarn('emitPage: Note: BEGIN != INCLUDE, so the description might be incorrectly located before the API include!') - else: + elif pi.type in refpage_other_types: specText = None descText = ''.join(file[pi.begin:pi.end + 1]) + else: + # This should be caught in the spec markup checking tests + logErr(f"emitPage: refpage type='{pi.type}' is unrecognized") # Rewrite asciidoctor xrefs to resolve properly in refpages specURL = conventions.specURL(pi.spec) @@ -456,7 +513,7 @@ def autoGenEnumsPage(baseDir, pi, file): - baseDir - base directory to emit page into - pi - pageInfo for this page relative to file - file - list of strings making up the file, indexed by pi""" - pageName = baseDir + '/' + pi.name + '.txt' + pageName = f'{baseDir}/{pi.name}{conventions.file_suffix}' fp = open(pageName, 'w', encoding='utf-8') # Add a dictionary entry for this page @@ -509,7 +566,7 @@ def autoGenFlagsPage(baseDir, flagName): - baseDir - base directory to emit page into - flagName - API *Flags name""" - pageName = baseDir + '/' + flagName + '.txt' + pageName = f'{baseDir}/{flagName}{conventions.file_suffix}' fp = open(pageName, 'w', encoding='utf-8') # Add a dictionary entry for this page @@ -564,7 +621,7 @@ def autoGenHandlePage(baseDir, handleName): - handleName - API handle name""" # @@ Need to determine creation function & add handles/ include for the # @@ interface in generator.py. - pageName = baseDir + '/' + handleName + '.txt' + pageName = f'{baseDir}/{handleName}{conventions.file_suffix}' fp = open(pageName, 'w', encoding='utf-8') # Add a dictionary entry for this page @@ -602,7 +659,8 @@ def genRef(specFile, baseDir): - specFile - filename to extract from - baseDir - output directory to generate page in""" - file = loadFile(specFile) + # We do not care the newline format used here. + file, _ = loadFile(specFile) if file is None: return @@ -623,6 +681,28 @@ def genRef(specFile, baseDir): for name in sorted(pageMap): pi = pageMap[name] + # Only generate the page if it is in the requested build + # 'freeform' pages are always generated + # 'feature' pages (core versions & extensions) are generated if they are in + # the requested feature list + # All other pages (APIs) are generated if they are in the API map for + # the build. + if pi.type in refpage_api_types: + if name not in api.typeCategory: + # Also check aliases of name - api.nonexistent is the same + # mapping used to rewrite *link: macros in this build. + if name not in api.nonexistent: + logWarn(f'genRef: NOT generating feature page {name} - API not in this build') + continue + else: + logWarn(f'genRef: generating feature page {name} because its alias {api.nonexistent[name]} exists') + elif pi.type in refpage_other_types: + # The only non-API type which can be checked is a feature refpage + if pi.type == 'feature': + if name not in api.features: + logWarn(f'genRef: NOT generating feature page {name} - feature not in this build') + continue + printPageInfo(pi, file) if pi.Warning: @@ -635,7 +715,7 @@ def genRef(specFile, baseDir): elif pi.type == 'flags': autoGenFlagsPage(baseDir, pi.name) else: - # Don't extract this page + # Do not extract this page logWarn('genRef: Cannot extract or autogenerate:', pi.name) pages[pi.name] = pi @@ -646,9 +726,9 @@ def genRef(specFile, baseDir): def genSinglePageRef(baseDir): - """Generate baseDir/apispec.txt, the single-page version of the ref pages. + """Generate the single-page version of the ref pages. - This assumes there's a page for everything in the api module dictionaries. + This assumes there is a page for everything in the api module dictionaries. Extensions (KHR, EXT, etc.) are currently skipped""" # Accumulate head of page head = io.StringIO() @@ -664,13 +744,15 @@ def genSinglePageRef(baseDir): ':data-uri:', ':toc2:', ':toclevels: 2', + ':attribute-missing: warn', '', sep='\n', file=head) print('== Copyright', file=head) print('', file=head) - print('include::{config}/copyright-ccby.txt[]', file=head) + print('include::{config}/copyright-ccby' + conventions.file_suffix + '[]', file=head) print('', file=head) + # Inject the table of contents. Asciidoc really ought to be generating # this for us. @@ -706,7 +788,7 @@ def genSinglePageRef(baseDir): keys = sorted(apiDict.keys()) for refPage in keys: - # Don't generate links for aliases, which are included with the + # Do not generate links for aliases, which are included with the # aliased page if refPage not in api.alias: # Add page to body @@ -720,7 +802,7 @@ def genSinglePageRef(baseDir): # Now, all are emitted. continue else: - print('include::' + refPage + '.txt[]', file=body) + print(f'include::{refPage}{conventions.file_suffix}[]', file=body) else: # Alternatively, we could (probably should) link to the # aliased refpage @@ -731,7 +813,7 @@ def genSinglePageRef(baseDir): print('\n' + ':leveloffset: 0' + '\n', file=body) # Write head and body to the output file - pageName = baseDir + '/apispec.txt' + pageName = f'{baseDir}/apispec{conventions.file_suffix}' fp = open(pageName, 'w', encoding='utf-8') print(head.getvalue(), file=fp, end='') @@ -757,17 +839,14 @@ def genExtension(baseDir, extpath, name, info): declares = [] elem = info.elem - # Type of extension (instance, device, etc.) - ext_type = elem.get('type') - # Autogenerate interfaces from entry - for required in elem.find('require'): + for required in elem.findall('require'): req_name = required.get('name') if not req_name: - # This isn't what we're looking for + # This is not what we are looking for continue if req_name.endswith('_SPEC_VERSION') or req_name.endswith('_EXTENSION_NAME'): - # Don't link to spec version or extension name - those ref pages aren't created. + # Do not link to spec version or extension name - those ref pages are not created. continue if required.get('extends'): @@ -777,54 +856,70 @@ def genExtension(baseDir, extpath, name, info): continue if req_name not in genDict: - logWarn('ERROR: {} (in extension {}) does not have a ref page.'.format(req_name, name)) + if req_name in api.alias: + logWarn(f'WARN: {req_name} (in extension {name}) is an alias, so does not have a ref page') + else: + logWarn(f'ERROR: {req_name} (in extension {name}) does not have a ref page.') declares.append(req_name) - # import pdb - # pdb.set_trace() - appbody = None + tail_content = None if extpath is not None: - appfp = open('{}/{}.txt'.format(extpath, name), 'r', encoding='utf-8') - if appfp is not None: + try: + appPath = extpath + '/' + conventions.extension_file_path(name) + appfp = open(appPath, 'r', encoding='utf-8') appbody = appfp.read() + appfp.close() # Transform internal links to crosslinks specURL = conventions.specURL() appbody = xrefRewrite(appbody, specURL) - else: + except FileNotFoundError: + print('Cannot find extension appendix for', name) logWarn('Cannot find extension appendix for', name) # Fall through to autogenerated page extpath = None appbody = None - appfp.close() - # Include the extension appendix without an extra title - # head_content = 'include::{{appendices}}/{}.txt[]'.format(name) + appbody = f'Cannot find extension appendix {appPath} for {name}\n' + else: + tail_content = makeExtensionInclude(name) # Write the extension refpage - pageName = baseDir + '/' + name + '.txt' + pageName = f'{baseDir}/{name}{conventions.file_suffix}' logDiag('genExtension:', pageName) fp = open(pageName, 'w', encoding='utf-8') # There are no generated titled sections sections = None - # 'See link:{html_spec_relative}#%s[ %s] in the main specification for complete information.' % ( - # name, name) refPageShell(name, - "{} extension".format(ext_type), + conventions.extension_short_description(elem), fp, appbody, - sections=sections) + sections=sections, + tail_content=tail_content) + + # Restore leveloffset for boilerplate in refPageTail + if conventions.include_extension_appendix_in_refpage: + # The generated metadata include (refpage.extensionname.adoc) moved + # the leveloffset attribute by -1 to account for the relative + # structuring of the spec extension appendix section structure vs. + # the refpages. + # This restores leveloffset for the boilerplate in refPageTail. + leveloffset = 1 + else: + leveloffset = 0 + refPageTail(pageName=name, specType=None, specAnchor=name, seeAlso=seeAlsoList(name, declares), fp=fp, - auto=True) + auto=True, + leveloffset=leveloffset) fp.close() @@ -872,10 +967,9 @@ def genExtension(baseDir, extpath, name, info): results = parser.parse_args() - # Look for api.py in the specified directory - if results.genpath is not None: - sys.path.insert(0, results.genpath) - import api + # Load the generated apimap module + sys.path.insert(0, results.genpath) + import apimap as api setLogFile(True, True, results.logFile) setLogFile(True, False, results.diagFile) @@ -896,19 +990,21 @@ def genExtension(baseDir, extpath, name, info): d = genRef(file, baseDir) pages.update(d) - # Now figure out which pages *weren't* generated from the spec. + # Now figure out which pages were not generated from the spec. # This relies on the dictionaries of API constructs in the api module. if not results.noauto: - registry = Registry() + # Must have an apiname selected to avoid complaints from + # registry.loadFile, even though it is irrelevant to our uses. + genOpts = GeneratorOptions(apiname = conventions.xml_api_name) + registry = Registry(genOpts = genOpts) registry.loadFile(results.registry) if conventions.write_refpage_include: # Only extensions with a supported="..." attribute in this set # will be considered for extraction/generation. - supported_strings = set((conventions.xml_api_name,)) ext_names = set(k for k, v in registry.extdict.items() - if v.supported in supported_strings) + if conventions.xml_api_name in v.supported.split(',')) desired_extensions = ext_names.intersection(set(results.extension)) for prefix in conventions.extension_index_prefixes: @@ -943,19 +1039,19 @@ def genExtension(baseDir, extpath, name, info): (extensions, apiName + ' Extensions'), ] - # Summarize pages that weren't generated, for good or bad reasons + # Summarize pages that were not generated, for good or bad reasons for (apiDict, title) in sections: # OpenXR was keeping a 'flagged' state which only printed out a # warning for the first non-generated page, but was otherwise - # unused. This doesn't seem helpful. + # unused. This does not seem helpful. for page in apiDict: if page not in genDict: # Page was not generated - why not? if page in api.alias: - logWarn('(Benign, is an alias) Ref page for', title, page, 'is aliased into', api.alias[page]) + logDiag('(Benign, is an alias) Ref page for', title, page, 'is aliased into', api.alias[page]) elif page in api.flags and api.flags[page] is None: - logWarn('(Benign, no FlagBits defined) No ref page generated for ', title, + logDiag('(Benign, no FlagBits defined) No ref page generated for ', title, page) else: # Could introduce additional logic to detect diff --git a/scripts/gen_dictionaries.py b/scripts/gen_dictionaries.py index 4d750876..85d6ffcf 100755 --- a/scripts/gen_dictionaries.py +++ b/scripts/gen_dictionaries.py @@ -19,9 +19,8 @@ def parse_xml(path): # File Header: def GetHeader(): - return """// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ + return """// Copyright 2017-2024 The Khronos Group. +// SPDX-License-Identifier: CC-BY-4.0 """ diff --git a/scripts/gen_version_notes.py b/scripts/gen_version_notes.py index 5843fe1d..b16faa42 100755 --- a/scripts/gen_version_notes.py +++ b/scripts/gen_version_notes.py @@ -23,9 +23,8 @@ def parse_xml(path): # File Header: def GetHeader(): - return """// Copyright 2017-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ + return """// Copyright 2017-2024 The Khronos Group. +// SPDX-License-Identifier: CC-BY-4.0 """ @@ -58,40 +57,58 @@ def ShortNote(name, added_in, deprecated_by): if added_in != "1.0" and deprecated_by != None: return "{missing_before} version %s and {deprecated_by} version %s." % (added_in, deprecated_by) -# Find feature groups that are parents of a feature/require/${entry_type} -# hierarchy, and then find all the ${entry_type} within each hierarchy: +# Find feature or extension groups that are parents of a or +# <${entry_type}> tag, and then find all the +# ${entry_type} within each hierarchy: def process_xml(spec, entry_type, note_printer): numberOfEntries = 0 numberOfNewEntries = 0 numberOfDeprecatedEntries = 0 - for feature in spec.findall('.//feature/require/%s/../..' % entry_type): - for entry in feature.findall('.//%s' % entry_type): - name = entry.get('name') - - numberOfEntries += 1 - added_in = feature.get('number') - deprecated_by = None - - # All the groups that this specific API ${entry_type} belongs. - categories = spec.findall( - './/require[@comment]/%s[@name="%s"]/..' % (entry_type, name)) - for category in categories: - comment = category.get('comment') - if "deprecated in OpenCL" in comment: - words = comment.split(" ") - assert " ".join(words[-4:-1]) == "deprecated in OpenCL" - assert deprecated_by == None # Can't deprecate something twice. - deprecated_by = words[-1] - - versionFileName = os.path.join(args.directory, name + ".asciidoc") - with open(versionFileName, 'w') as versionFile: - versionFile.write(GetHeader()) - versionFile.write(note_printer(name, added_in, deprecated_by)) - versionFile.write(GetFooter()) - - numberOfNewEntries += 0 if added_in == "1.0" else 1 - numberOfDeprecatedEntries += 0 if deprecated_by == None else 1 + # Track the APIs which have already had a version file written, to avoid + # a couple of cases like CL_DEPTH, which is required by both a core + # version and an extension. + seen_apis = set() + + for feature_type in [ 'feature', 'extension' ]: + for feature in spec.findall(f'.//{feature_type}/require/{entry_type}/../..'): + for entry in feature.findall(f'.//{entry_type}'): + name = entry.get('name') + deprecated_by = None + + numberOfEntries += 1 + if feature_type == 'feature': + added_in = feature.get('number') + + # All the groups that this specific API ${entry_type} belongs. + categories = spec.findall( + './/require[@comment]/%s[@name="%s"]/..' % (entry_type, name)) + for category in categories: + comment = category.get('comment') + if "deprecated in OpenCL" in comment: + words = comment.split(" ") + assert " ".join(words[-4:-1]) == "deprecated in OpenCL" + assert deprecated_by == None # Can't deprecate something twice. + deprecated_by = words[-1] + else: + if name in seen_apis: + print(f'WARNING: {name} exists as both a core version and extension API in the XML') + print('This is not currently handled correctly - only the core version dependency is noted') + continue + + # Extensions do not allow for deprecation + added_in = feature.get('name') + + seen_apis.add(name) + + versionFileName = os.path.join(args.directory, name + ".asciidoc") + with open(versionFileName, 'w') as versionFile: + versionFile.write(GetHeader()) + versionFile.write(note_printer(name, added_in, deprecated_by)) + versionFile.write(GetFooter()) + + numberOfNewEntries += 0 if added_in == "1.0" else 1 + numberOfDeprecatedEntries += 0 if deprecated_by == None else 1 print('Found ' + str(numberOfEntries) + ' API ' + entry_type + 's, ' + str(numberOfNewEntries) + " newer than 1.0, " diff --git a/scripts/gencl.py b/scripts/gencl.py index df7c7476..04f821ce 100755 --- a/scripts/gencl.py +++ b/scripts/gencl.py @@ -20,11 +20,9 @@ from pygenerator import PyOutputGenerator -from reflib import logDiag, logWarn, setLogFile +from reflib import logDiag, logWarn, logErr, setLogFile from reg import Registry - -from clconventions import OpenCLConventions as APIConventions - +from apiconventions import APIConventions # Simple timer functions startTime = None @@ -131,6 +129,11 @@ def makeGenOpts(args): # An API style conventions object conventions = APIConventions() + if args.apiname is not None: + defaultAPIName = args.apiname + else: + defaultAPIName = conventions.xml_api_name + # API include files for spec and ref pages # Overwrites include subdirectories in spec source tree # The generated include files do not include the calling convention @@ -145,7 +148,7 @@ def makeGenOpts(args): filename = 'timeMarker', directory = directory, genpath = genpath, - apiname = 'opencl', + apiname = defaultAPIName, profile = None, versions = featuresPat, emitversions = featuresPat, @@ -163,14 +166,14 @@ def makeGenOpts(args): # Python representation of API information, used by scripts that # don't need to load the full XML. - genOpts['api.py'] = [ + genOpts['apimap.py'] = [ PyOutputGenerator, DocGeneratorOptions( conventions = conventions, - filename = 'api.py', + filename = 'apimap.py', directory = directory, - genpath = genpath, - apiname = 'opencl', + genpath = None, + apiname = defaultAPIName, profile = None, versions = featuresPat, emitversions = featuresPat, @@ -181,6 +184,7 @@ def makeGenOpts(args): reparentEnums = False) ] + # Extension metainformation for spec extension appendices # Includes all extensions by default, but only so that the generated # 'promoted_extensions_*' files refer to all extensions that were @@ -192,7 +196,7 @@ def makeGenOpts(args): filename = 'timeMarker', directory = directory, genpath = None, - apiname = 'opencl', + apiname = defaultAPIName, profile = None, versions = featuresPat, emitversions = None, @@ -202,70 +206,6 @@ def makeGenOpts(args): emitExtensions = emitExtensionsPat) ] - # Platform extensions, in their own header files - # Each element of the platforms[] array defines information for - # generating a single platform: - # [0] is the generated header file name - # [1] is the set of platform extensions to generate - # [2] is additional extensions whose interfaces should be considered, - # but suppressed in the output, to avoid duplicate definitions of - # dependent types like VkDisplayKHR and VkSurfaceKHR which come from - # non-platform extensions. - - # Track all platform extensions, for exclusion from vulkan_core.h - allPlatformExtensions = [] - - # # Extensions suppressed for all platforms. - # # Covers common WSI extension types. - # commonSuppressExtensions = [ 'VK_KHR_display', 'VK_KHR_swapchain' ] - # - # platforms = [ - # [ 'vulkan_android.h', [ 'VK_KHR_android_surface', - # 'VK_ANDROID_external_memory_android_hardware_buffer' - # ], commonSuppressExtensions ], - # [ 'vulkan_fuchsia.h', [ 'VK_FUCHSIA_imagepipe_surface'], commonSuppressExtensions ], - # [ 'vulkan_ios.h', [ 'VK_MVK_ios_surface' ], commonSuppressExtensions ], - # [ 'vulkan_macos.h', [ 'VK_MVK_macos_surface' ], commonSuppressExtensions ], - # [ 'vulkan_vi.h', [ 'VK_NN_vi_surface' ], commonSuppressExtensions ], - # [ 'vulkan_wayland.h', [ 'VK_KHR_wayland_surface' ], commonSuppressExtensions ], - # [ 'vulkan_win32.h', [ 'VK_.*_win32(|_.*)' ], commonSuppressExtensions + [ 'VK_KHR_external_semaphore', 'VK_KHR_external_memory_capabilities', 'VK_KHR_external_fence', 'VK_KHR_external_fence_capabilities', 'VK_NV_external_memory_capabilities' ] ], - # [ 'vulkan_xcb.h', [ 'VK_KHR_xcb_surface' ], commonSuppressExtensions ], - # [ 'vulkan_xlib.h', [ 'VK_KHR_xlib_surface' ], commonSuppressExtensions ], - # [ 'vulkan_xlib_xrandr.h', [ 'VK_EXT_acquire_xlib_display' ], commonSuppressExtensions ], - # ] - # - # for platform in platforms: - # headername = platform[0] - # - # allPlatformExtensions += platform[1] - # - # addPlatformExtensionsRE = makeREstring(platform[1] + platform[2]) - # emitPlatformExtensionsRE = makeREstring(platform[1]) - # - # opts = CGeneratorOptions( - # filename = headername, - # directory = directory, - # apiname = 'vulkan', - # profile = None, - # versions = featuresPat, - # emitversions = None, - # defaultExtensions = None, - # addExtensions = addPlatformExtensionsRE, - # removeExtensions = None, - # emitExtensions = emitPlatformExtensionsRE, - # prefixText = prefixStrings + clPrefixStrings, - # genFuncPointers = True, - # protectFile = protectFile, - # protectFeature = False, - # protectProto = '#ifndef', - # protectProtoStr = 'VK_NO_PROTOTYPES', - # apicall = 'VKAPI_ATTR ', - # apientry = 'VKAPI_CALL ', - # apientryp = 'VKAPI_PTR *', - # alignFuncParam = 0) - # - # genOpts[headername] = [ COutputGenerator, opts ] - # Header for core API + extensions. # To generate just the core API, # change to 'defaultExtensions = None' below. @@ -274,8 +214,8 @@ def makeGenOpts(args): # It removes all platform extensions (from the platform headers options # constructed above) as well as any explicitly specified removals. - removeExtensionsPat = makeREstring( - allPlatformExtensions + removeExtensions, None, strings_are_regex=True) + removeExtensionsPat = makeREstring(removeExtensions, None, + strings_are_regex=True) genOpts['cl.h'] = [ COutputGenerator, @@ -284,7 +224,7 @@ def makeGenOpts(args): filename = 'cl.h', directory = directory, genpath = None, - apiname = 'opencl', + apiname = defaultAPIName, profile = None, versions = featuresPat, emitversions = featuresPat, @@ -353,8 +293,11 @@ def genTarget(args): if __name__ == '__main__': parser = argparse.ArgumentParser() + parser.add_argument('-apiname', action='store', + default=None, + help='Specify API to generate (defaults to repository-specific conventions object value)') parser.add_argument('-defaultExtensions', action='store', - default='opencl', + default=APIConventions().xml_api_name, help='Specify a single class of extensions to add to targets') parser.add_argument('-extension', action='append', default=[], @@ -425,10 +368,12 @@ def genTarget(args): else: diag = None - (gen, options) = (None, None) - if not args.validate: - # Create the API generator & generator options - (gen, options) = genTarget(args) + if args.time: + # Log diagnostics and warnings + setLogFile(setDiag = True, setWarn = True, filename = '-') + + # Create the API generator & generator options + (gen, options) = genTarget(args) # Create the registry object with the specified generator and generator # options. The options are set before XML loading as they may affect it. @@ -444,10 +389,6 @@ def genTarget(args): reg.loadElementTree(tree) endTimer(args.time, '* Time to parse ElementTree =') - if args.validate: - success = reg.validateRegistry() - sys.exit(0 if success else 1) - if args.dump: logDiag('* Dumping registry to regdump.txt') reg.dumpReg(filehandle=open('regdump.txt', 'w', encoding='utf-8')) diff --git a/scripts/generator.py b/scripts/generator.py index a5b648b9..dea2ffa3 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -17,7 +17,7 @@ try: from pathlib import Path except ImportError: - from pathlib2 import Path + from pathlib2 import Path # type: ignore from spec_tools.util import getElemName, getElemType @@ -43,7 +43,10 @@ def enquote(s): """Return string argument with surrounding quotes, for serialization into Python code.""" if s: - return "'{}'".format(s) + if isinstance(s, str): + return f"'{s}'" + else: + return s return None @@ -52,14 +55,17 @@ def regSortCategoryKey(feature): Sorts by category of the feature name string: - Core API features (those defined with a `` tag) + - (sort VKSC after VK - this is Vulkan-specific) - ARB/KHR/OES (Khronos extensions) - other (EXT/vendor extensions)""" if feature.elem.tag == 'feature': - return 0 - if (feature.category == 'ARB' - or feature.category == 'KHR' - or feature.category == 'OES'): + if feature.name.startswith('VKSC'): + return 0.5 + else: + return 0 + + if feature.category.upper() in ['ARB', 'KHR', 'OES']: return 1 return 2 @@ -68,10 +74,15 @@ def regSortCategoryKey(feature): def regSortOrderKey(feature): """Sort key for regSortFeatures - key is the sortorder attribute.""" - # print("regSortOrderKey {} -> {}".format(feature.name, feature.sortorder)) return feature.sortorder +def regSortNameKey(feature): + """Sort key for regSortFeatures - key is the extension name.""" + + return feature.name + + def regSortFeatureVersionKey(feature): """Sort key for regSortFeatures - key is the feature version. `` elements all have version number 0.""" @@ -99,6 +110,36 @@ def regSortFeatures(featureList): featureList.sort(key=regSortOrderKey) +class MissingGeneratorOptionsError(RuntimeError): + """Error raised when a Generator tries to do something that requires GeneratorOptions but it is None.""" + + def __init__(self, msg=None): + full_msg = 'Missing generator options object self.genOpts' + if msg: + full_msg += ': ' + msg + super().__init__(full_msg) + + +class MissingRegistryError(RuntimeError): + """Error raised when a Generator tries to do something that requires a Registry object but it is None.""" + + def __init__(self, msg=None): + full_msg = 'Missing Registry object self.registry' + if msg: + full_msg += ': ' + msg + super().__init__(full_msg) + + +class MissingGeneratorOptionsConventionsError(RuntimeError): + """Error raised when a Generator tries to do something that requires a Conventions object but it is None.""" + + def __init__(self, msg=None): + full_msg = 'Missing Conventions object self.genOpts.conventions' + if msg: + full_msg += ': ' + msg + super().__init__(full_msg) + + class GeneratorOptions: """Base class for options used during header/documentation production. @@ -111,6 +152,7 @@ def __init__(self, directory='.', genpath=None, apiname=None, + mergeApiNames=None, profile=None, versions='.*', emitversions='.*', @@ -119,8 +161,12 @@ def __init__(self, removeExtensions=None, emitExtensions=None, emitSpirv=None, + emitFormats=None, reparentEnums=True, - sortProcedure=regSortFeatures): + sortProcedure=regSortFeatures, + requireCommandAliases=False, + requireDepends=True, + ): """Constructor. Arguments: @@ -128,9 +174,11 @@ def __init__(self, - conventions - may be mandatory for some generators: an object that implements ConventionsBase - filename - basename of file to generate, or None to write to stdout. - - directory - directory in which to generate files - - genpath - path to previously generated files, such as api.py + - directory - directory in which to generate filename + - genpath - path to previously generated files, such as apimap.py - apiname - string matching `` 'apiname' attribute, e.g. 'gl'. + - mergeApiNames - If not None, a comma separated list of API names + to merge into the API specified by 'apiname' - profile - string specifying API profile , e.g. 'core', or None. - versions - regex matching API versions to process interfaces for. Normally `'.*'` or `'[0-9][.][0-9]'` to match all defined versions. @@ -148,10 +196,11 @@ def __init__(self, to None. - emitExtensions - regex matching names of extensions to actually emit interfaces for (though all requested versions are considered when - deciding which interfaces to generate). - to None. + deciding which interfaces to generate). Defaults to None. - emitSpirv - regex matching names of extensions and capabilities to actually emit interfaces for. + - emitFormats - regex matching names of formats to actually emit + interfaces for. - reparentEnums - move elements which extend an enumerated type from or elements to the target element. This is required for almost all purposes, but the @@ -159,9 +208,16 @@ def __init__(self, or being complete. Defaults to True. - sortProcedure - takes a list of FeatureInfo objects and sorts them in place to a preferred order in the generated output. - Default is core API versions, ARB/KHR/OES extensions, all other - extensions, by core API version number or extension number in each - group. + - requireCommandAliases - if True, treat command aliases + as required dependencies. + - requireDepends - whether to follow API dependencies when emitting + APIs. + + Default is + - core API versions + - Khronos (ARB/KHR/OES) extensions + - All other extensions + - By core API version number or extension number in each group. The regex patterns can be None or empty, in which case they match nothing.""" @@ -173,7 +229,7 @@ def __init__(self, "basename of file to generate, or None to write to stdout." self.genpath = genpath - """path to previously generated files, such as api.py""" + """path to previously generated files, such as apimap.py""" self.directory = directory "directory in which to generate filename" @@ -181,6 +237,9 @@ def __init__(self, self.apiname = apiname "string matching `` 'apiname' attribute, e.g. 'gl'." + self.mergeApiNames = mergeApiNames + "comma separated list of API names to merge into the API specified by 'apiname'" + self.profile = profile "string specifying API profile , e.g. 'core', or None." @@ -217,6 +276,10 @@ def __init__(self, """regex matching names of extensions and capabilities to actually emit interfaces for.""" + self.emitFormats = self.emptyRegex(emitFormats) + """regex matching names of formats + to actually emit interfaces for.""" + self.reparentEnums = reparentEnums """boolean specifying whether to remove elements from or when extending an type.""" @@ -230,6 +293,16 @@ def __init__(self, self.codeGenerator = False """True if this generator makes compilable code""" + self.registry = None + """Populated later with the registry object.""" + + self.requireCommandAliases = requireCommandAliases + """True if alias= attributes of tags are transitively + required.""" + + self.requireDepends = requireDepends + """True if dependencies of API tags are transitively required.""" + def emptyRegex(self, pat): """Substitute a regular expression which matches no version or extension names for None or the empty string.""" @@ -257,6 +330,17 @@ class OutputGenerator: 'basetype': 'basetypes', } + def breakName(self, name, msg): + """Break into debugger if this is a special name""" + + # List of string names to break on + bad = ( + ) + + if name in bad and True: + print('breakName {}: {}'.format(name, msg)) + pdb.set_trace() + def __init__(self, errFile=sys.stderr, warnFile=sys.stderr, diagFile=sys.stdout): """Constructor @@ -268,9 +352,17 @@ def __init__(self, errFile=sys.stderr, warnFile=sys.stderr, diagFile=sys.stdout) self.diagFile = diagFile # Internal state self.featureName = None + """The current feature name being generated.""" + self.genOpts = None + """The GeneratorOptions subclass instance.""" + self.registry = None + """The specification registry object.""" + self.featureDictionary = {} + """The dictionary of dictionaries of API features.""" + # Used for extension enum value generation self.extBase = 1000000000 self.extBlockSize = 1000 @@ -280,6 +372,9 @@ def __init__(self, errFile=sys.stderr, warnFile=sys.stderr, diagFile=sys.stdout) # derived generators. self.apidict = None + # File suffix for generated files, set in beginFile below. + self.file_suffix = '' + def logMsg(self, level, *args): """Write a message of different categories to different destinations. @@ -306,9 +401,17 @@ def logMsg(self, level, *args): raise UserWarning( '*** FATAL ERROR in Generator.logMsg: unknown level:' + level) - def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): + def enumToValue(self, elem, needsNum, bitwidth = 32, + forceSuffix = False, parent_for_alias_dereference=None): """Parse and convert an `` tag into a value. + - elem - Element + - needsNum - generate a numeric representation of the element value + - bitwidth - size of the numeric representation in bits (32 or 64) + - forceSuffix - if True, always use a 'U' / 'ULL' suffix on integers + - parent_for_alias_dereference - if not None, an Element containing + the parent of elem, used to look for elements this is an alias of + Returns a list: - first element - integer representation of the value, or None @@ -330,6 +433,11 @@ def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): - An 'alias' attribute contains the name of another enum which this is an alias of. The other enum must be declared first when emitting this enum.""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() + name = elem.get('name') numVal = None if 'value' in elem.keys(): @@ -337,7 +445,7 @@ def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): # print('About to translate value =', value, 'type =', type(value)) if needsNum: numVal = int(value, 0) - # If there's a non-integer, numeric 'type' attribute (e.g. 'u' or + # If there is a non-integer, numeric 'type' attribute (e.g. 'u' or # 'ull'), append it to the string value. # t = enuminfo.elem.get('type') # if t is not None and t != '' and t != 'i' and t != 's': @@ -354,7 +462,7 @@ def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): bitpos = int(value, 0) numVal = 1 << bitpos value = '0x%08x' % numVal - if bitwidth == 64: + if bitwidth == 64 or bitpos >= 32: value = value + 'ULL' elif forceSuffix: value = value + 'U' @@ -381,7 +489,15 @@ def enumToValue(self, elem, needsNum, bitwidth = 32, forceSuffix = False): self.logMsg('diag', 'Enum', name, '-> offset [', numVal, ',', value, ']') return [numVal, value] if 'alias' in elem.keys(): - return [None, elem.get('alias')] + alias_of = elem.get('alias') + if parent_for_alias_dereference is None: + return (None, alias_of) + siblings = parent_for_alias_dereference.findall('enum') + for sib in siblings: + sib_name = sib.get('name') + if sib_name == alias_of: + return self.enumToValue(sib, needsNum) + raise RuntimeError("Could not find the aliased enum value") return [None, None] def checkDuplicateEnums(self, enums): @@ -418,7 +534,7 @@ def checkDuplicateEnums(self, enums): + ') found with different values:' + strVal + ' and ' + strVal2) - # Don't add the duplicate to the returned list + # Do not add the duplicate to the returned list continue elif numVal in valueMap: # Duplicate value found (such as an alias); report it, but @@ -448,6 +564,11 @@ def misracppstyle(self): def buildEnumCDecl(self, expand, groupinfo, groupName): """Generate the C declaration for an enum""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() + groupElem = groupinfo.elem # Determine the required bit width for the enum group. @@ -519,7 +640,7 @@ def buildEnumCDecl_BitmaskOrDefine(self, groupinfo, groupName, bitwidth, usedefi # Accumulate non-numeric enumerant values separately and append # them following the numeric values, to allow for aliases. - # NOTE: this doesn't do a topological sort yet, so aliases of + # NOTE: this does not do a topological sort yet, so aliases of # aliases can still get in the wrong order. aliasText = '' @@ -553,7 +674,10 @@ def buildEnumCDecl_BitmaskOrDefine(self, groupinfo, groupName, bitwidth, usedefi # Work around this by chasing the aliases to get the actual value. while numVal is None: alias = self.registry.tree.find("enums/enum[@name='" + strVal + "']") - (numVal, strVal) = self.enumToValue(alias, True, bitwidth, True) + if alias is not None: + (numVal, strVal) = self.enumToValue(alias, True, bitwidth, True) + else: + self.logMsg('error', 'No such alias {} for enum {}'.format(strVal, name)) decl += "static const {} {} = {};\n".format(flagTypeName, name, strVal) if numVal is not None: @@ -596,7 +720,6 @@ def buildEnumCDecl_Enum(self, expand, groupinfo, groupName): maxValidValue = 2**(32 - 1) - 1 minValidValue = (maxValidValue * -1) - 1 - # Get a list of nested 'enum' tags. enums = groupElem.findall('enum') @@ -612,10 +735,13 @@ def buildEnumCDecl_Enum(self, expand, groupinfo, groupName): # Accumulate non-numeric enumerant values separately and append # them following the numeric values, to allow for aliases. - # NOTE: this doesn't do a topological sort yet, so aliases of + # NOTE: this does not do a topological sort yet, so aliases of # aliases can still get in the wrong order. aliasText = [] + maxName = None + minValue = None + maxValue = None for elem in enums: # Convert the value to an integer and use that to track min/max. # Values of form -(number) are accepted but nothing more complex. @@ -651,15 +777,15 @@ def buildEnumCDecl_Enum(self, expand, groupinfo, groupName): self.logMsg('error', 'Allowable range for C enum types is [', minValidValue, ',', maxValidValue, '], but', name, 'has a value outside of this (', strVal, ')\n') exit(1) - # Don't track min/max for non-numbers (numVal is None) + # Do not track min/max for non-numbers (numVal is None) if isEnum and numVal is not None and elem.get('extends') is None: if minName is None: minName = maxName = name minValue = maxValue = numVal - elif numVal < minValue: + elif minValue is None or numVal < minValue: minName = name minValue = numVal - elif numVal > maxValue: + elif maxValue is None or numVal > maxValue: maxName = name maxValue = numVal @@ -668,17 +794,15 @@ def buildEnumCDecl_Enum(self, expand, groupinfo, groupName): # Generate min/max value tokens - legacy use case. if isEnum and expand: - body.extend((" {}_BEGIN_RANGE{} = {},".format(expandPrefix, expandSuffix, minName), - " {}_END_RANGE{} = {},".format( - expandPrefix, expandSuffix, maxName), - " {}_RANGE_SIZE{} = ({} - {} + 1),".format(expandPrefix, expandSuffix, maxName, minName))) + body.extend((f' {expandPrefix}_BEGIN_RANGE{expandSuffix} = {minName},', + f' {expandPrefix}_END_RANGE{expandSuffix} = {maxName},', + f' {expandPrefix}_RANGE_SIZE{expandSuffix} = ({maxName} - {minName} + 1),')) # Generate a range-padding value to ensure the enum is 32 bits, but - # only in code generators, so it doesn't appear in documentation + # only in code generators, so it does not appear in documentation if (self.genOpts.codeGenerator or self.conventions.generate_max_enum_in_docs): - body.append(" {}_MAX_ENUM{} = 0x7FFFFFFF".format( - expandPrefix, expandSuffix)) + body.append(f' {expandPrefix}_MAX_ENUM{expandSuffix} = 0x7FFFFFFF') # Postfix body.append("} %s;" % groupName) @@ -747,19 +871,25 @@ def makeDir(self, path): def beginFile(self, genOpts): """Start a new interface file - - genOpts - GeneratorOptions controlling what's generated and how""" + - genOpts - GeneratorOptions controlling what is generated and how""" + self.genOpts = genOpts + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() self.should_insert_may_alias_macro = \ self.genOpts.conventions.should_insert_may_alias_macro(self.genOpts) + self.file_suffix = self.genOpts.conventions.file_suffix - # Try to import the API dictionary, api.py, if it exists. Nothing in - # api.py cannot be extracted directly from the XML, and in the + # Try to import the API dictionary, apimap.py, if it exists. Nothing + # in apimap.py cannot be extracted directly from the XML, and in the # future we should do that. if self.genOpts.genpath is not None: try: sys.path.insert(0, self.genOpts.genpath) - import api - self.apidict = api + import apimap + self.apidict = apimap except ImportError: self.apidict = None @@ -778,19 +908,23 @@ def endFile(self): self.warnFile.flush() if self.diagFile: self.diagFile.flush() - self.outFile.flush() - if self.outFile != sys.stdout and self.outFile != sys.stderr: - self.outFile.close() - - # On successfully generating output, move the temporary file to the - # target file. - if self.genOpts.filename is not None: - if sys.platform == 'win32': - directory = Path(self.genOpts.directory) - if not Path.exists(directory): - os.makedirs(directory) - shutil.copy(self.outFile.name, self.genOpts.directory + '/' + self.genOpts.filename) - os.remove(self.outFile.name) + if self.outFile: + self.outFile.flush() + if self.outFile != sys.stdout and self.outFile != sys.stderr: + self.outFile.close() + + if self.genOpts is None: + raise MissingGeneratorOptionsError() + + # On successfully generating output, move the temporary file to the + # target file. + if self.genOpts.filename is not None: + if sys.platform == 'win32': + directory = Path(self.genOpts.directory) + if not Path.exists(directory): + os.makedirs(directory) + shutil.copy(self.outFile.name, self.genOpts.directory + '/' + self.genOpts.filename) + os.remove(self.outFile.name) self.genOpts = None def beginFeature(self, interface, emit): @@ -800,7 +934,7 @@ def beginFeature(self, interface, emit): - emit - actually write to the header only when True""" self.emit = emit self.featureName = interface.get('name') - # If there's an additional 'protect' attribute in the feature, save it + # If there is an additional 'protect' attribute in the feature, save it self.featureExtraProtect = interface.get('protect') def endFeature(self): @@ -812,7 +946,7 @@ def endFeature(self): def genRequirements(self, name, mustBeFound = True): """Generate text showing what core versions and extensions introduce - an API. This exists in the base Generator class because it's used by + an API. This exists in the base Generator class because it is used by the shared enumerant-generating interfaces (buildEnumCDecl, etc.). Here it returns an empty string for most generators, but can be overridden by e.g. DocGenerator. @@ -825,7 +959,7 @@ def genRequirements(self, name, mustBeFound = True): return '' def validateFeature(self, featureType, featureName): - """Validate we're generating something only inside a `` tag""" + """Validate we are generating something only inside a `` tag""" if self.featureName is None: raise UserWarning('Attempt to generate', featureType, featureName, 'when not in feature') @@ -887,16 +1021,52 @@ def genSpirv(self, spirv, spirvinfo, alias): Extend to generate as desired in your derived class.""" return + def genFormat(self, format, formatinfo, alias): + """Generate interface for a format element. + + - formatinfo - FormatInfo + + Extend to generate as desired in your derived class.""" + return + + def genSyncStage(self, stageinfo): + """Generate interface for a sync stage element. + + - stageinfo - SyncStageInfo + + Extend to generate as desired in your derived class.""" + return + + def genSyncAccess(self, accessinfo): + """Generate interface for a sync stage element. + + - accessinfo - AccessInfo + + Extend to generate as desired in your derived class.""" + return + + def genSyncPipeline(self, pipelineinfo): + """Generate interface for a sync stage element. + + - pipelineinfo - SyncPipelineInfo + + Extend to generate as desired in your derived class.""" + return + def makeProtoName(self, name, tail): """Turn a `` `` into C-language prototype and typedef declarations for that name. - name - contents of `` tag - tail - whatever text follows that tag in the Element""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() return self.genOpts.apientry + name + tail def makeTypedefName(self, name, tail): """Make the function-pointer typedef name for a command.""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() return '(' + self.genOpts.apientryp + 'PFN_' + name + tail + ')' def makeCParamDecl(self, param, aligncol): @@ -907,6 +1077,10 @@ def makeCParamDecl(self, param, aligncol): - param - Element (`` or ``) to format - aligncol - if non-zero, attempt to align the nested `` element at this column""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() indent = ' ' paramdecl = indent prefix = noneStr(param.text) @@ -940,8 +1114,6 @@ def makeCParamDecl(self, param, aligncol): # Clear prefix for subsequent iterations prefix = '' - # If prefix was originally non-empty and the param has no elements - # (e.g. is nothing but text), preserve it. paramdecl = paramdecl + prefix if aligncol == 0: @@ -955,6 +1127,10 @@ def getCParamTypeLength(self, param): or structure/union member). - param - Element (`` or ``) to identify""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() + if self.genOpts.conventions is None: + raise MissingGeneratorOptionsConventionsError() # Allow for missing tag newLen = 0 @@ -985,6 +1161,9 @@ def getMaxCParamTypeLength(self, info): def getHandleParent(self, typename): """Get the parent of a handle object.""" + if self.registry is None: + raise MissingRegistryError() + info = self.registry.typedict.get(typename) if info is None: return None @@ -1008,6 +1187,9 @@ def getHandleAncestors(self, typename): def getTypeCategory(self, typename): """Get the category of a type.""" + if self.registry is None: + raise MissingRegistryError() + info = self.registry.typedict.get(typename) if info is None: return None @@ -1018,10 +1200,12 @@ def getTypeCategory(self, typename): return None def isStructAlwaysValid(self, structname): - """Try to do check if a structure is always considered valid (i.e. there's no rules to its acceptance).""" + """Try to do check if a structure is always considered valid (i.e. there is no rules to its acceptance).""" # A conventions object is required for this call. if not self.conventions: raise RuntimeError("To use isStructAlwaysValid, be sure your options include a Conventions object.") + if self.registry is None: + raise MissingRegistryError() if self.conventions.type_always_valid(structname): return True @@ -1031,7 +1215,8 @@ def isStructAlwaysValid(self, structname): return False info = self.registry.typedict.get(structname) - assert(info is not None) + if info is None: + self.logMsg('error', f'isStructAlwaysValid({structname}) - structure not found in typedict') members = info.getMembers() @@ -1063,6 +1248,21 @@ def isStructAlwaysValid(self, structname): return True + def paramIsArray(self, param): + """Check if the parameter passed in is a pointer to an array. + + param the XML information for the param + """ + return param.get('len') is not None + + def paramIsPointer(self, param): + """Check if the parameter passed in is a pointer. + + param the XML information for the param + """ + tail = param.find('type').tail + return tail is not None and '*' in tail + def isEnumRequired(self, elem): """Return True if this `` element is required, False otherwise @@ -1098,6 +1298,8 @@ def makeCDecls(self, cmd): `` Element, as a two-element list of strings. - cmd - Element containing a `` tag""" + if self.genOpts is None: + raise MissingGeneratorOptionsError() proto = cmd.find('proto') params = cmd.findall('param') # Begin accumulating prototype and typedef strings @@ -1114,7 +1316,7 @@ def makeCDecls(self, cmd): # Leading text pdecl += noneStr(proto.text) tdecl += noneStr(proto.text) - # For each child element, if it's a wrap in appropriate + # For each child element, if it is a wrap in appropriate # declaration. Otherwise append its contents and tail contents. for elem in proto: text = noneStr(elem.text) diff --git a/scripts/parse_dependency.py b/scripts/parse_dependency.py new file mode 100755 index 00000000..5d204959 --- /dev/null +++ b/scripts/parse_dependency.py @@ -0,0 +1,403 @@ +#!/usr/bin/python3 + +# Copyright 2022-2024 The Khronos Group Inc. +# Copyright 2003-2019 Paul McGuire +# SPDX-License-Identifier: MIT + +# apirequirements.py - parse 'depends' expressions in API XML +# Supported methods: +# dependency - the expression string +# +# evaluateDependency(dependency, isSupported) evaluates the expression, +# returning a boolean result. isSupported takes an extension or version name +# string and returns a boolean. +# +# dependencyLanguage(dependency) returns an English string equivalent +# to the expression, suitable for header file comments. +# +# dependencyNames(dependency) returns a set of the extension and +# version names in the expression. +# +# dependencyMarkup(dependency) returns a string containing asciidoctor +# markup for English equivalent to the expression, suitable for extension +# appendices. +# +# All may throw a ParseException if the expression cannot be parsed or is +# not completely consumed by parsing. + +# Supported expressions at present: +# - extension names +# - '+' as AND connector +# - ',' as OR connector +# - parenthesization for grouping + +# Based on https://github.com/pyparsing/pyparsing/blob/master/examples/fourFn.py + +from pyparsing import ( + Literal, + Word, + Group, + Forward, + alphas, + alphanums, + Regex, + ParseException, + CaselessKeyword, + Suppress, + delimitedList, + infixNotation, +) +import math +import operator +import pyparsing as pp +import re + +from apiconventions import APIConventions as APIConventions +conventions = APIConventions() + +def markupPassthrough(name): + """Pass a name (leaf or operator) through without applying markup""" + return name + +def leafMarkupAsciidoc(name): + """Markup a leaf name as an asciidoc link to an API version or extension + anchor. + + - name - version or extension name""" + + return conventions.formatVersionOrExtension(name) + +def leafMarkupC(name): + """Markup a leaf name as a C expression, using conventions of the + Vulkan Validation Layers + + - name - version or extension name""" + + (apivariant, major, minor) = apiVersionNameMatch(name) + + if apivariant is not None: + return name + else: + return f'ext.{name}' + +opMarkupAsciidocMap = { '+' : 'and', ',' : 'or' } + +def opMarkupAsciidoc(op): + """Markup an operator as an asciidoc spec markup equivalent + + - op - operator ('+' or ',')""" + + return opMarkupAsciidocMap[op] + +opMarkupCMap = { '+' : '&&', ',' : '||' } + +def opMarkupC(op): + """Markup an operator as a C language equivalent + + - op - operator ('+' or ',')""" + + return opMarkupCMap[op] + + +# Unfortunately global to be used in pyparsing +exprStack = [] + +def push_first(toks): + """Push a token on the global stack + + - toks - first element is the token to push""" + + exprStack.append(toks[0]) + +# An identifier (version or extension name) +dependencyIdent = Word(alphanums + '_') + +# Infix expression for depends expressions +dependencyExpr = pp.infixNotation(dependencyIdent, + [ (pp.oneOf(', +'), 2, pp.opAssoc.LEFT), ]) + +# BNF grammar for depends expressions +_bnf = None +def dependencyBNF(): + """ + boolop :: '+' | ',' + extname :: Char(alphas) + atom :: extname | '(' expr ')' + expr :: atom [ boolop atom ]* + """ + global _bnf + if _bnf is None: + and_, or_ = map(Literal, '+,') + lpar, rpar = map(Suppress, '()') + boolop = and_ | or_ + + expr = Forward() + expr_list = delimitedList(Group(expr)) + atom = ( + boolop[...] + + ( + (dependencyIdent).setParseAction(push_first) + | Group(lpar + expr + rpar) + ) + ) + + expr <<= atom + (boolop + atom).setParseAction(push_first)[...] + _bnf = expr + return _bnf + + +# map operator symbols to corresponding arithmetic operations +_opn = { + '+': operator.and_, + ',': operator.or_, +} + +def evaluateStack(stack, isSupported): + """Evaluate an expression stack, returning a boolean result. + + - stack - the stack + - isSupported - function taking a version or extension name string and + returning True or False if that name is supported or not.""" + + op, num_args = stack.pop(), 0 + if isinstance(op, tuple): + op, num_args = op + + if op in '+,': + # Note: operands are pushed onto the stack in reverse order + op2 = evaluateStack(stack, isSupported) + op1 = evaluateStack(stack, isSupported) + return _opn[op](op1, op2) + elif op[0].isalpha(): + return isSupported(op) + else: + raise Exception(f'invalid op: {op}') + +def evaluateDependency(dependency, isSupported): + """Evaluate a dependency expression, returning a boolean result. + + - dependency - the expression + - isSupported - function taking a version or extension name string and + returning True or False if that name is supported or not.""" + + global exprStack + exprStack = [] + results = dependencyBNF().parseString(dependency, parseAll=True) + val = evaluateStack(exprStack[:], isSupported) + return val + +def evalDependencyLanguage(stack, leafMarkup, opMarkup, parenthesize, root): + """Evaluate an expression stack, returning an English equivalent + + - stack - the stack + - leafMarkup, opMarkup, parenthesize - same as dependencyLanguage + - root - True only if this is the outer (root) expression level""" + + op, num_args = stack.pop(), 0 + if isinstance(op, tuple): + op, num_args = op + if op in '+,': + # Could parenthesize, not needed yet + rhs = evalDependencyLanguage(stack, leafMarkup, opMarkup, parenthesize, root = False) + opname = opMarkup(op) + lhs = evalDependencyLanguage(stack, leafMarkup, opMarkup, parenthesize, root = False) + if parenthesize and not root: + return f'({lhs} {opname} {rhs})' + else: + return f'{lhs} {opname} {rhs}' + elif op[0].isalpha(): + # This is an extension or feature name + return leafMarkup(op) + else: + raise Exception(f'invalid op: {op}') + +def dependencyLanguage(dependency, leafMarkup, opMarkup, parenthesize): + """Return an API dependency expression translated to a form suitable for + asciidoctor conditionals or header file comments. + + - dependency - the expression + - leafMarkup - function taking an extension / version name and + returning an equivalent marked up version + - opMarkup - function taking an operator ('+' / ',') name name and + returning an equivalent marked up version + - parenthesize - True if parentheses should be used in the resulting + expression, False otherwise""" + + global exprStack + exprStack = [] + results = dependencyBNF().parseString(dependency, parseAll=True) + return evalDependencyLanguage(exprStack, leafMarkup, opMarkup, parenthesize, root = True) + +# aka specmacros = False +def dependencyLanguageComment(dependency): + """Return dependency expression translated to a form suitable for + comments in headers of emitted C code, as used by the + docgenerator.""" + return dependencyLanguage(dependency, leafMarkup = markupPassthrough, opMarkup = opMarkupAsciidoc, parenthesize = True) + +# aka specmacros = True +def dependencyLanguageSpecMacros(dependency): + """Return dependency expression translated to a form suitable for + comments in headers of emitted C code, as used by the + interfacegenerator.""" + return dependencyLanguage(dependency, leafMarkup = leafMarkupAsciidoc, opMarkup = opMarkupAsciidoc, parenthesize = False) + +def dependencyLanguageC(dependency): + """Return dependency expression translated to a form suitable for + use in C expressions""" + return dependencyLanguage(dependency, leafMarkup = leafMarkupC, opMarkup = opMarkupC, parenthesize = True) + +def evalDependencyNames(stack): + """Evaluate an expression stack, returning the set of extension and + feature names used in the expression. + + - stack - the stack""" + + op, num_args = stack.pop(), 0 + if isinstance(op, tuple): + op, num_args = op + if op in '+,': + # Do not evaluate the operation. We only care about the names. + return evalDependencyNames(stack) | evalDependencyNames(stack) + elif op[0].isalpha(): + return { op } + else: + raise Exception(f'invalid op: {op}') + +def dependencyNames(dependency): + """Return a set of the extension and version names in an API dependency + expression. Used when determining transitive dependencies for spec + generation with specific extensions included. + + - dependency - the expression""" + + global exprStack + exprStack = [] + results = dependencyBNF().parseString(dependency, parseAll=True) + # print(f'names(): stack = {exprStack}') + return evalDependencyNames(exprStack) + +def markupTraverse(expr, level = 0, root = True): + """Recursively process a dependency in infix form, transforming it into + asciidoctor markup with expression nesting indicated by indentation + level. + + - expr - expression to process + - level - indentation level to render expression at + - root - True only on initial call""" + + if level > 0: + prefix = '{nbsp}{nbsp}' * level * 2 + ' ' + else: + prefix = '' + str = '' + + for elem in expr: + if isinstance(elem, pp.ParseResults): + if not root: + nextlevel = level + 1 + else: + # Do not indent the outer expression + nextlevel = level + + str = str + markupTraverse(elem, level = nextlevel, root = False) + elif elem in ('+', ','): + str = str + f'{prefix}{opMarkupAsciidoc(elem)} +\n' + else: + str = str + f'{prefix}{leafMarkupAsciidoc(elem)} +\n' + + return str + +def dependencyMarkup(dependency): + """Return asciidoctor markup for a human-readable equivalent of an API + dependency expression, suitable for use in extension appendix + metadata. + + - dependency - the expression""" + + parsed = dependencyExpr.parseString(dependency) + return markupTraverse(parsed) + +if __name__ == "__main__": + for str in [ 'VK_VERSION_1_0', 'cl_khr_extension_name', 'XR_VERSION_3_2', 'CL_VERSION_1_0' ]: + print(f'{str} -> {conventions.formatVersionOrExtension(str)}') + import sys + sys.exit(0) + + termdict = { + 'VK_VERSION_1_1' : True, + 'false' : False, + 'true' : True, + } + termSupported = lambda name: name in termdict and termdict[name] + + def test(dependency, expected): + val = False + try: + val = evaluateDependency(dependency, termSupported) + except ParseException as pe: + print(dependency, f'failed parse: {dependency}') + except Exception as e: + print(dependency, f'failed eval: {dependency}') + + if val == expected: + True + # print(f'{dependency} = {val} (as expected)') + else: + print(f'{dependency} ERROR: {val} != {expected}') + + # Verify expressions are evaluated left-to-right + + test('false,false+false', False) + test('false,false+true', False) + test('false,true+false', False) + test('false,true+true', True) + test('true,false+false', False) + test('true,false+true', True) + test('true,true+false', False) + test('true,true+true', True) + + test('false,(false+false)', False) + test('false,(false+true)', False) + test('false,(true+false)', False) + test('false,(true+true)', True) + test('true,(false+false)', True) + test('true,(false+true)', True) + test('true,(true+false)', True) + test('true,(true+true)', True) + + + test('false+false,false', False) + test('false+false,true', True) + test('false+true,false', False) + test('false+true,true', True) + test('true+false,false', False) + test('true+false,true', True) + test('true+true,false', True) + test('true+true,true', True) + + test('false+(false,false)', False) + test('false+(false,true)', False) + test('false+(true,false)', False) + test('false+(true,true)', False) + test('true+(false,false)', False) + test('true+(false,true)', True) + test('true+(true,false)', True) + test('true+(true,true)', True) + + # Check formatting + for dependency in [ + #'true', + #'true+true+false', + 'true+false', + 'true+(true+false),(false,true)', + #'true+((true+false),(false,true))', + 'VK_VERSION_1_0+VK_KHR_display', + #'VK_VERSION_1_1+(true,false)', + ]: + print(f'expr = {dependency}\n{dependencyMarkup(dependency)}') + print(f' spec language = {dependencyLanguageSpecMacros(dependency)}') + print(f' comment language = {dependencyLanguageComment(dependency)}') + print(f' C language = {dependencyLanguageC(dependency)}') + print(f' names = {dependencyNames(dependency)}') + print(f' value = {evaluateDependency(dependency, termSupported)}') diff --git a/scripts/pygenerator.py b/scripts/pygenerator.py index da8cd7d6..6656b460 100644 --- a/scripts/pygenerator.py +++ b/scripts/pygenerator.py @@ -4,118 +4,50 @@ # # SPDX-License-Identifier: Apache-2.0 -import sys -from generator import OutputGenerator, enquote, noneStr, write +from generator import OutputGenerator, enquote, write +from scriptgenerator import ScriptOutputGenerator import pprint -class PyOutputGenerator(OutputGenerator): - """PyOutputGenerator - subclass of OutputGenerator. - Generates Python data structures describing API names and relationships. - Similar to DocOutputGenerator, but writes a single file.""" - - def apiName(self, name): - """Return True if name is in the reserved API namespace. - - Delegates to the conventions object. """ - return self.genOpts.conventions.is_api_name(name) +class PyOutputGenerator(ScriptOutputGenerator): + """PyOutputGenerator - subclass of ScriptOutputGenerator. + Generates Python data structures describing API names and + relationships.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - # Track features being generated - self.features = [] - - # Reverse map from interface names to features requiring them - self.apimap = {} - - def beginFile(self, genOpts): - OutputGenerator.beginFile(self, genOpts) - # - # Dictionaries are keyed by the name of the entity (e.g. - # self.structs is keyed by structure names). Values are - # the names of related entities (e.g. structs contain - # a list of type names of members, enums contain a list - # of enumerants belong to the enumerated type, etc.), or - # just None if there are no directly related entities. - # - # Collect the mappings, then emit the Python script in endFile - self.basetypes = {} - self.consts = {} - self.enums = {} - self.flags = {} - self.funcpointers = {} - self.protos = {} - self.structs = {} - self.handles = {} - self.defines = {} - self.alias = {} - # Dictionary containing the type of a type name - # (e.g. the string name of the dictionary with its contents). - self.typeCategory = {} - self.mapDict = {} - - def addInterfaceMapping(self, api, feature, required): - """Add a reverse mapping in self.apimap from an API to a feature - requiring that API. - - - api - name of the API - - feature - name of the feature requiring it - - required - None, or an additional feature dependency within - 'feature' """ - - # Each entry in self.apimap contains one or more - # ( feature, required ) tuples. - deps = ( feature, required ) + def beginDict(self, name): + """String starting definition of a named dictionary""" + return f'{name} = {{' - if api in self.apimap: - self.apimap[api].append(deps) - else: - self.apimap[api] = [ deps ] + def endDict(self): + """ String ending definition of a named dictionary""" + return '}' - def mapInterfaceKeys(self, feature, key): - """Construct reverse mapping of APIs to features requiring them in - self.apimap. + def writeDict(self, dict, name, printValues = True): + """Write dictionary as a Python dictionary with the given name. + If printValues is False, just output keys with None values.""" - - feature - name of the feature being generated - - key - API category - 'define', 'basetype', etc.""" - - dict = self.featureDictionary[feature][key] - - if dict: - # Not clear why handling of command vs. type APIs is different - - # see interfacedocgenerator.py, which this was based on. - if key == 'command': - for required in dict: - for api in dict[required]: - self.addInterfaceMapping(api, feature, required) + write(self.beginDict(name), file=self.outFile) + for key in sorted(dict): + if printValues: + value = enquote(dict[key]) else: - for required in dict: - for parent in dict[required]: - for api in dict[required][parent]: - self.addInterfaceMapping(api, feature, required) - - def mapInterfaces(self, feature): - """Construct reverse mapping of APIs to features requiring them in - self.apimap. + value = 'None' + write(f'{enquote(key)} : {value},', file=self.outFile) + write(self.endDict(), file=self.outFile) - - feature - name of the feature being generated""" + def writeList(self, l, name): + """Write list l as a Ruby hash with the given name""" - # Map each category of interface - self.mapInterfaceKeys(feature, 'basetype') - self.mapInterfaceKeys(feature, 'bitmask') - self.mapInterfaceKeys(feature, 'command') - self.mapInterfaceKeys(feature, 'define') - self.mapInterfaceKeys(feature, 'enum') - self.mapInterfaceKeys(feature, 'enumconstant') - self.mapInterfaceKeys(feature, 'funcpointer') - self.mapInterfaceKeys(feature, 'handle') - self.mapInterfaceKeys(feature, 'include') - self.mapInterfaceKeys(feature, 'struct') - self.mapInterfaceKeys(feature, 'union') + self.writeDict(l, name, printValues = False) def endFile(self): + # Creates the inverse mapping of nonexistent APIs to their aliases. + super().createInverseMap() + # Print out all the dictionaries as Python strings. - # Could just print(dict) but that's not human-readable + # Could just print(dict) but that is not human-readable dicts = ( [ self.basetypes, 'basetypes' ], [ self.consts, 'consts' ], [ self.enums, 'enums' ], @@ -126,240 +58,37 @@ def endFile(self): [ self.handles, 'handles' ], [ self.defines, 'defines' ], [ self.typeCategory, 'typeCategory' ], - [ self.alias, 'alias' ] ) - for (entry_dict, name) in dicts: - write(name + ' = {}', file=self.outFile) - for key in sorted(entry_dict.keys()): - write(name + '[' + enquote(key) + '] = ', entry_dict[key], - file=self.outFile) + [ self.alias, 'alias' ], + [ self.nonexistent, 'nonexistent' ], + ) + + for (dict, name) in dicts: + self.writeDict(dict, name) # Dictionary containing the relationships of a type # (e.g. a dictionary with each related type as keys). - write('mapDict = {}', file=self.outFile) - # Could just print(self.mapDict), but prefer something # human-readable and stable-ordered + write(self.beginDict('mapDict'), file=self.outFile) for baseType in sorted(self.mapDict.keys()): - write('mapDict[' + enquote(baseType) + '] = ', file=self.outFile, end='') - pprint.pprint(self.mapDict[baseType], self.outFile) + write('{} : {},'.format(enquote(baseType), + pprint.pformat(self.mapDict[baseType])), file=self.outFile) + write(self.endDict(), file=self.outFile) + + # List of included feature names + self.writeList(sorted(self.features), 'features') # Generate feature <-> interface mappings for feature in self.features: self.mapInterfaces(feature) # Write out the reverse map from APIs to requiring features - write('requiredBy = {}', file=self.outFile) - + write(self.beginDict('requiredBy'), file=self.outFile) for api in sorted(self.apimap): - # Construct list of requirements as Python list arguments - ##reqs = ', '.join('({}, {})'.format(enquote(dep[0]), enquote(dep[1])) for dep in self.apimap[api]) - ##write('requiredBy[{}] = ( {} )'.format(enquote(api), reqs), file=self.outFile) - - # Ideally these would be sorted by dep[0] as well - reqs = ', '.join('({}, {})'.format(enquote(dep[0]), enquote(dep[1])) for dep in self.apimap[api]) - write('requiredBy[{}] = {}'.format(enquote(api), pprint.saferepr(self.apimap[api])), file=self.outFile) - - OutputGenerator.endFile(self) - - def beginFeature(self, interface, emit): - # Start processing in superclass - OutputGenerator.beginFeature(self, interface, emit) - - # Add this feature to the list being tracked - self.features.append( self.featureName ) - - def endFeature(self): - # Finish processing in superclass - OutputGenerator.endFeature(self) - - def addName(self, entry_dict, name, value): - """Add a string entry to the dictionary, quoting it so it gets printed - out correctly in self.endFile().""" - entry_dict[name] = enquote(value) - - def addMapping(self, baseType, refType): - """Add a mapping between types to mapDict. - - Only include API types, so we don't end up with a lot of useless uint32_t and void types.""" - if not self.apiName(baseType) or not self.apiName(refType): - self.logMsg('diag', 'PyOutputGenerator::addMapping: IGNORE map from', baseType, '<->', refType) - return - - self.logMsg('diag', 'PyOutputGenerator::addMapping: map from', - baseType, '<->', refType) - - if baseType not in self.mapDict: - baseDict = {} - self.mapDict[baseType] = baseDict - else: - baseDict = self.mapDict[baseType] - if refType not in self.mapDict: - refDict = {} - self.mapDict[refType] = refDict - else: - refDict = self.mapDict[refType] - - baseDict[refType] = None - refDict[baseType] = None - - def genType(self, typeinfo, name, alias): - """Generate type. - - - For 'struct' or 'union' types, defer to genStruct() to - add to the dictionary. - - For 'bitmask' types, add the type name to the 'flags' dictionary, - with the value being the corresponding 'enums' name defining - the acceptable flag bits. - - For 'enum' types, add the type name to the 'enums' dictionary, - with the value being '@STOPHERE@' (because this case seems - never to happen). - - For 'funcpointer' types, add the type name to the 'funcpointers' - dictionary. - - For 'handle' and 'define' types, add the handle or #define name - to the 'struct' dictionary, because that's how the spec sources - tag these types even though they aren't structs.""" - OutputGenerator.genType(self, typeinfo, name, alias) - typeElem = typeinfo.elem - # If the type is a struct type, traverse the embedded tags - # generating a structure. Otherwise, emit the tag text. - category = typeElem.get('category') - - # Add a typeCategory{} entry for the category of this type. - self.addName(self.typeCategory, name, category) - - if category in ('struct', 'union'): - self.genStruct(typeinfo, name, alias) - else: - if alias: - # Add name -> alias mapping - self.addName(self.alias, name, alias) - - # Always emit an alias (?!) - count = 1 - - # May want to only emit full type definition when not an alias? - else: - # Extract the type name - # (from self.genOpts). Copy other text through unchanged. - # If the resulting text is an empty string, don't emit it. - count = len(noneStr(typeElem.text)) - for elem in typeElem: - count += len(noneStr(elem.text)) + len(noneStr(elem.tail)) - - if count > 0: - if category == 'bitmask': - requiredEnum = typeElem.get('requires') - self.addName(self.flags, name, requiredEnum) - - # This happens when the Flags type is defined, but no - # FlagBits are defined yet. - if requiredEnum is not None: - self.addMapping(name, requiredEnum) - elif category == 'enum': - # This case does not seem to come up. It nominally would - # result from - # , - # but the output generator doesn't emit them directly. - self.logMsg('warn', 'PyOutputGenerator::genType: invalid \'enum\' category for name:', name) - elif category == 'funcpointer': - self.funcpointers[name] = None - elif category == 'handle': - self.handles[name] = None - elif category == 'define': - self.defines[name] = None - elif category == 'basetype': - # Don't add an entry for base types that are not API types - # e.g. an API Bool type gets an entry, uint32_t does not - if self.apiName(name): - self.basetypes[name] = None - self.addName(self.typeCategory, name, 'basetype') - else: - self.logMsg('diag', 'PyOutputGenerator::genType: unprocessed type:', name, 'category:', category) - else: - self.logMsg('diag', 'PyOutputGenerator::genType: unprocessed type:', name) - - def genStruct(self, typeinfo, typeName, alias): - """Generate struct (e.g. C "struct" type). - - Add the struct name to the 'structs' dictionary, with the - value being an ordered list of the struct member names.""" - OutputGenerator.genStruct(self, typeinfo, typeName, alias) - - if alias: - # Add name -> alias mapping - self.addName(self.alias, typeName, alias) - else: - # May want to only emit definition on this branch - True - - members = [member.text for member in typeinfo.elem.findall('.//member/name')] - self.structs[typeName] = members - memberTypes = [member.text for member in typeinfo.elem.findall('.//member/type')] - for member_type in memberTypes: - self.addMapping(typeName, member_type) - - def genGroup(self, groupinfo, groupName, alias): - """Generate group (e.g. C "enum" type). - - These are concatenated together with other types. - - - Add the enum type name to the 'enums' dictionary, with - the value being an ordered list of the enumerant names. - - Add each enumerant name to the 'consts' dictionary, with - the value being the enum type the enumerant is part of.""" - OutputGenerator.genGroup(self, groupinfo, groupName, alias) - groupElem = groupinfo.elem - - if alias: - # Add name -> alias mapping - self.addName(self.alias, groupName, alias) - else: - # May want to only emit definition on this branch - True - - # Loop over the nested 'enum' tags. - enumerants = [elem.get('name') for elem in groupElem.findall('enum')] - for name in enumerants: - self.addName(self.consts, name, groupName) - self.enums[groupName] = enumerants - - def genEnum(self, enuminfo, name, alias): - """Generate enumerant (compile-time constants). - - - Add the constant name to the 'consts' dictionary, with the - value being None to indicate that the constant isn't - an enumeration value.""" - OutputGenerator.genEnum(self, enuminfo, name, alias) - - if name not in self.consts: - # Add a typeCategory{} entry for the category of this type. - self.addName(self.typeCategory, name, 'consts') - self.consts[name] = None - # Otherwise, don't add it to the consts dictionary because it's - # already present. This happens due to the generator 'reparentEnums' - # parameter being False, so each extension enum appears in both the - # type and in the or it originally - # came from. - - def genCmd(self, cmdinfo, name, alias): - """Generate command. - - - Add the command name to the 'protos' dictionary, with the - value being an ordered list of the parameter names.""" - OutputGenerator.genCmd(self, cmdinfo, name, alias) - - if alias: - # Add name -> alias mapping - self.addName(self.alias, name, alias) - else: - # May want to only emit definition on this branch - True - - # Add a typeCategory{} entry for the category of this type. - self.addName(self.typeCategory, name, 'protos') + # Sort requirements by first feature in each one + deps = sorted(self.apimap[api], key = lambda dep: dep[0]) + reqs = ', '.join('({}, {})'.format(enquote(dep[0]), enquote(dep[1])) for dep in deps) + write('{} : [{}],'.format(enquote(api), reqs), file=self.outFile) + write(self.endDict(), file=self.outFile) - params = [param.text for param in cmdinfo.elem.findall('param/name')] - self.protos[name] = params - paramTypes = [param.text for param in cmdinfo.elem.findall('param/type')] - for param_type in paramTypes: - self.addMapping(name, param_type) + super().endFile() diff --git a/scripts/reflib.py b/scripts/reflib.py index 535683ae..36db7590 100644 --- a/scripts/reflib.py +++ b/scripts/reflib.py @@ -100,26 +100,26 @@ def logErr(*args, **kwargs): if file is not None: file.write(strfile.getvalue()) - sys.exit(1) + raise UserWarning(strfile.getvalue()) def isempty(s): """Return True if s is nothing but white space, False otherwise""" return len(''.join(s.split())) == 0 class pageInfo: - """Information about a ref page relative to the file it's extracted from.""" + """Information about a ref page relative to the file it is extracted from.""" def __init__(self): self.extractPage = True """True if page should be extracted""" self.Warning = None - """string warning if page is suboptimal or can't be generated""" + """string warning if page is suboptimal or cannot be generated""" self.embed = False """False or the name of the ref page this include is embedded within""" self.type = None - """'structs', 'protos', 'funcpointers', 'flags', 'enums'""" + """refpage type attribute - 'structs', 'protos', 'freeform', etc.""" self.name = None """struct/proto/enumerant/etc. name""" @@ -236,23 +236,27 @@ def lookupPage(pageMap, name): return pi def loadFile(filename): - """Load a file into a list of strings. Return the list or None on failure""" + """Load a file into a list of strings. Return the (list, newline_string) or (None, None) on failure""" + newline_string = "\n" try: - fp = open(filename, 'r', encoding='utf-8') + with open(filename, 'rb') as fp: + contents = fp.read() + if contents.count(b"\r\n") > 1: + newline_string = "\r\n" + + with open(filename, 'r', encoding='utf-8') as fp: + lines = fp.readlines() except: logWarn('Cannot open file', filename, ':', sys.exc_info()[0]) - return None - - file = fp.readlines() - fp.close() + return None, None - return file + return lines, newline_string def clampToBlock(line, minline, maxline): """Clamp a line number to be in the range [minline,maxline]. If the line number is None, just return it. - If minline is None, don't clamp to that value.""" + If minline is None, do not clamp to that value.""" if line is None: return line if minline and line < minline: @@ -280,8 +284,8 @@ def fixupRefs(pageMap, specFile, file): # # line to the include line, so autogeneration can at least # # pull the include out, but mark it not to be extracted. # # Examples include the host sync table includes in - # # chapters/fundamentals.txt and the table of Vk*Flag types in - # # appendices/boilerplate.txt. + # # chapters/fundamentals.adoc and the table of Vk*Flag types in + # # appendices/boilerplate.adoc. # if pi.begin is None and pi.validity is None and pi.end is None: # pi.begin = pi.include # pi.extractPage = False @@ -289,7 +293,7 @@ def fixupRefs(pageMap, specFile, file): # continue # Using open block delimiters, ref pages must *always* have a - # defined begin and end. If either is undefined, that's fatal. + # defined begin and end. If either is undefined, that is fatal. if pi.begin is None: pi.extractPage = False pi.Warning = 'Can\'t identify begin of ref page open block' @@ -300,7 +304,7 @@ def fixupRefs(pageMap, specFile, file): pi.Warning = 'Can\'t identify end of ref page open block' continue - # If there's no description of the page, infer one from the type + # If there is no description of the page, infer one from the type if pi.desc is None: if pi.type is not None: # pi.desc = pi.type[0:len(pi.type)-1] + ' (no short description available)' @@ -314,6 +318,9 @@ def fixupRefs(pageMap, specFile, file): # begin. funcpointer, proto, and struct pages infer the location of # the parameter and body sections. Other pages infer the location of # the body, but have no parameter sections. + # + # Probably some other types infer this as well - refer to list of + # all page types in genRef.py:emitPage() if pi.include is not None: if pi.type in ['funcpointers', 'protos', 'structs']: pi.param = nextPara(file, pi.include) @@ -325,13 +332,13 @@ def fixupRefs(pageMap, specFile, file): else: pi.Warning = 'Page does not have an API definition include::' - # It's possible for the inferred param and body lines to run past + # It is possible for the inferred param and body lines to run past # the end of block, if, for example, there is no parameter section. pi.param = clampToBlock(pi.param, pi.include, pi.end) pi.body = clampToBlock(pi.body, pi.param, pi.end) # We can get to this point with .include, .param, and .validity - # all being None, indicating those sections weren't found. + # all being None, indicating those sections were not found. logDiag('fixupRefs: after processing,', pi.name, 'looks like:') printPageInfo(pi, file) @@ -340,7 +347,7 @@ def fixupRefs(pageMap, specFile, file): # inferences about invalid pages. # # If a reference without a .end is entirely inside a valid reference, - # then it's intentionally embedded - may want to create an indirect + # then it is intentionally embedded - may want to create an indirect # page that links into the embedding page. This is done by a very # inefficient double loop, but the loop depth is small. for name in sorted(pageMap.keys()): @@ -350,7 +357,7 @@ def fixupRefs(pageMap, specFile, file): for embedName in sorted(pageMap.keys()): logDiag('fixupRefs: comparing', pi.name, 'to', embedName) embed = pageMap[embedName] - # Don't check embeddings which are themselves invalid + # Do not check embeddings which are themselves invalid if not embed.extractPage: logDiag('Skipping check for embedding in:', embed.name) continue @@ -375,9 +382,20 @@ def fixupRefs(pageMap, specFile, file): 'at line', pi.include) +def compatiblePageTypes(refpage_type, pagemap_type): + """Returns whether two refpage 'types' (categories) are compatible - + this is only true for 'consts' and 'enums' types.""" + + constsEnums = [ 'consts', 'enums' ] + + if refpage_type == pagemap_type: + return True + if refpage_type in constsEnums and pagemap_type in constsEnums: + return True + return False + # Patterns used to recognize interesting lines in an asciidoc source file. # These patterns are only compiled once. -INCSVAR_DEF = re.compile(r':INCS-VAR: (?P.*)') endifPat = re.compile(r'^endif::(?P[\w_+,]+)\[\]') beginPat = re.compile(r'^\[open,(?Prefpage=.*)\]') # attribute key/value pairs of an open block @@ -387,13 +405,13 @@ def fixupRefs(pageMap, specFile, file): errorPat = re.compile(r'^// *refError') # This regex transplanted from check_spec_links -# It looks for either OpenXR or Vulkan generated file conventions, and for -# the api/validity include (generated_type), protos/struct/etc path -# (category), and API name (entity_name). It could be put into the API -# conventions object. +# It looks for various generated file conventions, and for the api/validity +# include (generated_type), protos/struct/etc path (category), and API name +# (entity_name). +# It could be put into the API conventions object, instead of being +# generalized for all the different specs. INCLUDE = re.compile( - r'include::(?P((../){1,4}|\{INCS-VAR\}/|\{generated\}/)(generated/)?)(?P[\w]+)/(?P\w+)/(?P[^./]+).txt[\[][\]]') - + r'include::(?P((../){1,4}|\{generated\}/)(generated/)?)(?P[\w]+)/(?P\w+)/(?P[^./]+)\.(adoc|txt)[\[][\]]') def findRefs(file, filename): """Identify reference pages in a list of strings, returning a dictionary of @@ -405,7 +423,7 @@ def findRefs(file, filename): # first detect the '[open,refpage=...]' markup delimiting the block; # skip past the '--' block delimiter on the next line; and identify the # '--' block delimiter closing the page. - # This can't be done solely with pattern matching, and requires state to + # This cannot be done solely with pattern matching, and requires state to # track 'inside/outside block'. # When looking for open blocks, possible states are: # 'outside' - outside a block @@ -422,26 +440,10 @@ def findRefs(file, filename): # Track the pageInfo object corresponding to the current open block pi = None - incsvar = None while (line < numLines): setLogLine(line) - # Look for a file-wide definition - matches = INCSVAR_DEF.match(file[line]) - if matches: - incsvar = matches.group('value') - logDiag('Matched INCS-VAR definition:', incsvar) - - line = line + 1 - continue - - # Perform INCS-VAR substitution immediately. - if incsvar and '{INCS-VAR}' in file[line]: - newLine = file[line].replace('{INCS-VAR}', incsvar) - logDiag('PERFORMING SUBSTITUTION', file[line], '->', newLine) - file[line] = newLine - # Only one of the patterns can possibly match. Add it to # the dictionary for that name. @@ -451,7 +453,7 @@ def findRefs(file, filename): logDiag('Matched open block pattern') attribs = matches.group('attribs') - # If the previous open block wasn't closed, raise an error + # If the previous open block was not closed, raise an error if openBlockState != 'outside': logErr('Nested open block starting at line', line, 'of', filename) @@ -553,7 +555,7 @@ def findRefs(file, filename): if gen_type == 'validity': logDiag('Matched validity pattern') if pi is not None: - if pi.type and refpage_type != pi.type: + if pi.type and not compatiblePageTypes(refpage_type, pi.type): logWarn('ERROR: pageMap[' + name + '] type:', pi.type, 'does not match type:', refpage_type) pi.type = refpage_type @@ -570,7 +572,7 @@ def findRefs(file, filename): if pi is not None: if pi.include is not None: logDiag('found multiple includes for this block') - if pi.type and refpage_type != pi.type: + if pi.type and not compatiblePageTypes(refpage_type, pi.type): logWarn('ERROR: pageMap[' + name + '] type:', pi.type, 'does not match type:', refpage_type) pi.type = refpage_type @@ -643,7 +645,7 @@ def getBranch(): """Determine current git branch Returns (branch name, ''), or (None, stderr output) if the branch name - can't be determined""" + cannot be determined""" command = [ 'git', 'symbolic-ref', '--short', 'HEAD' ] results = subprocess.run(command, diff --git a/scripts/reg.py b/scripts/reg.py index 1b1173e7..b8f8af7c 100755 --- a/scripts/reg.py +++ b/scripts/reg.py @@ -10,19 +10,25 @@ import re import sys import xml.etree.ElementTree as etree -from collections import defaultdict, namedtuple -from generator import OutputGenerator, GeneratorOptions, write -import pdb +from collections import defaultdict, deque, namedtuple + +from generator import GeneratorOptions, OutputGenerator, noneStr, write +from apiconventions import APIConventions def apiNameMatch(str, supported): """Return whether a required api name matches a pattern specified for an XML 'api' attribute or 'supported' attribute. - - str - api name such as 'vulkan' or 'openxr' - - supported - comma-separated list of XML API names""" + - str - API name such as 'vulkan' or 'openxr'. May be None, in which + case it never matches (this should not happen). + - supported - comma-separated list of XML API names. May be None, in + which case str always matches (this is the usual case).""" - return (str is not None and str in supported.split(',')) + if str is not None: + return supported is None or str in supported.split(',') + # Fallthrough case - either str is None or the test failed + return False def matchAPIProfile(api, profile, elem): """Return whether an API and profile @@ -52,7 +58,7 @@ def matchAPIProfile(api, profile, elem): --------- -------- None None Always matches 'string' None Always matches - None 'string' Does not match. Can't generate multiple APIs + None 'string' Does not match. Cannot generate multiple APIs or profiles, so if an API/profile constraint is present, it must be asked for explicitly. 'string' 'string' Strings must match @@ -60,7 +66,7 @@ def matchAPIProfile(api, profile, elem): ** In the future, we will allow regexes for the attributes, not just strings, so that `api="^(gl|gles2)"` will match. Even - this isn't really quite enough, we might prefer something + this is not really quite enough, we might prefer something like `"gl(core)|gles1(common-lite)"`.""" # Match 'api', if present elem_api = elem.get('api') @@ -69,7 +75,7 @@ def matchAPIProfile(api, profile, elem): raise UserWarning("No API requested, but 'api' attribute is present with value '" + elem_api + "'") elif api != elem_api: - # Requested API doesn't match attribute + # Requested API does not match attribute return False elem_profile = elem.get('profile') if elem_profile: @@ -77,11 +83,114 @@ def matchAPIProfile(api, profile, elem): raise UserWarning("No profile requested, but 'profile' attribute is present with value '" + elem_profile + "'") elif profile != elem_profile: - # Requested profile doesn't match attribute + # Requested profile does not match attribute return False return True +def mergeAPIs(tree, fromApiNames, toApiName): + """Merge multiple APIs using the precedence order specified in apiNames. + Also deletes elements. + + tree - Element at the root of the hierarchy to merge. + apiNames - list of strings of API names.""" + + stack = deque() + stack.append(tree) + + while len(stack) > 0: + parent = stack.pop() + + for child in parent.findall('*'): + if child.tag == 'remove': + # Remove elements + parent.remove(child) + else: + stack.append(child) + + supportedList = child.get('supported') + if supportedList: + supportedList = supportedList.split(',') + for apiName in [toApiName] + fromApiNames: + if apiName in supportedList: + child.set('supported', toApiName) + + if child.get('api'): + definitionName = None + definitionVariants = [] + + # Keep only one definition with the same name if there are multiple definitions + if child.tag in ['type']: + if child.get('name') is not None: + definitionName = child.get('name') + definitionVariants = parent.findall(f"{child.tag}[@name='{definitionName}']") + else: + definitionName = child.find('name').text + definitionVariants = parent.findall(f"{child.tag}/name[.='{definitionName}']/..") + elif child.tag in ['member', 'param']: + definitionName = child.find('name').text + definitionVariants = parent.findall(f"{child.tag}/name[.='{definitionName}']/..") + elif child.tag in ['enum', 'feature']: + definitionName = child.get('name') + definitionVariants = parent.findall(f"{child.tag}[@name='{definitionName}']") + elif child.tag in ['require']: + definitionName = child.get('feature') + definitionVariants = parent.findall(f"{child.tag}[@feature='{definitionName}']") + elif child.tag in ['command']: + definitionName = child.find('proto/name').text + definitionVariants = parent.findall(f"{child.tag}/proto/name[.='{definitionName}']/../..") + + if definitionName: + bestMatchApi = None + requires = None + for apiName in [toApiName] + fromApiNames: + for variant in definitionVariants: + # Keep any requires attributes from the target API + if variant.get('requires') and variant.get('api') == apiName: + requires = variant.get('requires') + # Find the best matching definition + if apiName in variant.get('api').split(',') and bestMatchApi is None: + bestMatchApi = variant.get('api') + + if bestMatchApi: + for variant in definitionVariants: + if variant.get('api') != bestMatchApi: + # Only keep best matching definition + parent.remove(variant) + else: + # Add requires attribute from the target API if it is not overridden + if requires is not None and variant.get('requires') is None: + variant.set('requires', requires) + variant.set('api', toApiName) + + +def stripNonmatchingAPIs(tree, apiName, actuallyDelete = True): + """Remove tree Elements with 'api' attributes matching apiName. + + tree - Element at the root of the hierarchy to strip. Only its + children can actually be removed, not the tree itself. + apiName - string which much match a command-separated component of + the 'api' attribute. + actuallyDelete - only delete matching elements if True.""" + + stack = deque() + stack.append(tree) + + while len(stack) > 0: + parent = stack.pop() + + for child in parent.findall('*'): + api = child.get('api') + + if apiNameMatch(apiName, api): + # Add child to the queue + stack.append(child) + elif not apiNameMatch(apiName, api): + # Child does not match requested api. Remove it. + if actuallyDelete: + parent.remove(child) + + class BaseInfo: """Base class for information about a registry feature (type/group/enum/command/API/extension). @@ -128,12 +237,12 @@ def compareElem(self, info, infoName): if (self.compareKeys(info, 'value', required = True) or self.compareKeys(info, 'bitpos', required = True)): # If both specify the same value or bit position, - # they're equal + # they are equal return True elif (self.compareKeys(info, 'extnumber') and self.compareKeys(info, 'offset') and self.compareKeys(info, 'dir')): - # If both specify the same relative offset, they're equal + # If both specify the same relative offset, they are equal return True elif (self.compareKeys(info, 'alias')): # If both are aliases of the same value @@ -141,7 +250,7 @@ def compareElem(self, info, infoName): else: return False else: - # The same enum can't extend two different types + # The same enum cannot extend two different types return False else: # Non-s should never be redefined @@ -236,22 +345,21 @@ def __init__(self, elem): attribute of . Extensions do not have API version numbers and are assigned number 0.""" - self.number = "0" + self.number = 0 self.supported = None else: # Extract vendor portion of __ self.category = self.name.split('_', 2)[1] self.version = "0" self.versionNumber = "0" - self.number = elem.get('number') + + self.number = int(elem.get('number','0')) """extension number, used for ordering and for assigning enumerant offsets. features do not have extension - numbers and are assigned number 0.""" + numbers and are assigned number 0, as are extensions without + numbers, so sorting works.""" - # If there's no 'number' attribute, use 0, so sorting works - if self.number is None: - self.number = 0 - self.supported = elem.get('supported') + self.supported = elem.get('supported', 'disabled') class SpirvInfo(BaseInfo): """Registry information about an API @@ -260,6 +368,36 @@ class SpirvInfo(BaseInfo): def __init__(self, elem): BaseInfo.__init__(self, elem) +class FormatInfo(BaseInfo): + """Registry information about an API .""" + + def __init__(self, elem, condition): + BaseInfo.__init__(self, elem) + # Need to save the condition here when it is known + self.condition = condition + +class SyncStageInfo(BaseInfo): + """Registry information about .""" + + def __init__(self, elem, condition): + BaseInfo.__init__(self, elem) + # Need to save the condition here when it is known + self.condition = condition + +class SyncAccessInfo(BaseInfo): + """Registry information about .""" + + def __init__(self, elem, condition): + BaseInfo.__init__(self, elem) + # Need to save the condition here when it is known + self.condition = condition + +class SyncPipelineInfo(BaseInfo): + """Registry information about .""" + + def __init__(self, elem): + BaseInfo.__init__(self, elem) + class Registry: """Object representing an API registry, loaded from an XML file.""" @@ -272,7 +410,9 @@ def __init__(self, gen=None, genOpts=None): "Output generator used to write headers / messages" if genOpts is None: - self.genOpts = GeneratorOptions() + # If no generator is provided, we may still need the XML API name + # (for example, in genRef.py). + self.genOpts = GeneratorOptions(apiname = APIConventions().xml_api_name) else: self.genOpts = genOpts "Options controlling features to write and how to format them" @@ -311,6 +451,18 @@ def __init__(self, gen=None, genOpts=None): self.spirvcapdict = {} "dictionary of FeatureInfo objects for `` elements keyed by spirv capability name" + self.formatsdict = {} + "dictionary of FeatureInfo objects for `` elements keyed by VkFormat name" + + self.syncstagedict = {} + "dictionary of Sync*Info objects for `` elements keyed by VkPipelineStageFlagBits2 name" + + self.syncaccessdict = {} + "dictionary of Sync*Info objects for `` elements keyed by VkAccessFlagBits2 name" + + self.syncpipelinedict = {} + "dictionary of Sync*Info objects for `` elements keyed by pipeline type name" + self.emitFeatures = False """True to actually emit features for a version / extension, or False to just treat them as emitted""" @@ -356,27 +508,20 @@ def addElementInfo(self, elem, info, infoName, dictionary): Intended for internal use only. - - elem - ``/``/``/``/``/``/``/`` Element - - info - corresponding {Type|Group|Enum|Cmd|Feature|Spirv}Info object - - infoName - 'type' / 'group' / 'enum' / 'command' / 'feature' / 'extension' / 'spirvextension' / 'spirvcapability' - - dictionary - self.{type|group|enum|cmd|api|ext|spirvext|spirvcap}dict + - elem - ``/``/``/``/``/``/``/``/``/``/``/`` Element + - info - corresponding {Type|Group|Enum|Cmd|Feature|Spirv|Format|SyncStage|SyncAccess|SyncPipeline}Info object + - infoName - 'type' / 'group' / 'enum' / 'command' / 'feature' / 'extension' / 'spirvextension' / 'spirvcapability' / 'format' / 'syncstage' / 'syncaccess' / 'syncpipeline' + - dictionary - self.{type|group|enum|cmd|api|ext|format|spirvext|spirvcap|sync}dict + + The dictionary key is the element 'name' attribute.""" - If the Element has an 'api' attribute, the dictionary key is the - tuple (name,api). If not, the key is the name. 'name' is an - attribute of the Element""" # self.gen.logMsg('diag', 'Adding ElementInfo.required =', # info.required, 'name =', elem.get('name')) - api = elem.get('api') - if api: - key = (elem.get('name'), api) - else: - key = elem.get('name') + key = elem.get('name') if key in dictionary: if not dictionary[key].compareElem(info, infoName): self.gen.logMsg('warn', 'Attempt to redefine', key, '(this should not happen)') - else: - True else: dictionary[key] = info @@ -406,27 +551,48 @@ def breakOnName(self, regexp): def parseTree(self): """Parse the registry Element, once created""" # This must be the Element for the root + if self.tree is None: + raise RuntimeError("Tree not initialized!") self.reg = self.tree.getroot() + # Preprocess the tree in one of the following ways: + # - either merge a set of APIs to another API based on their 'api' attributes + # - or remove all elements with non-matching 'api' attributes + # The preprocessing happens through a breath-first tree traversal. + # This is a blunt hammer, but eliminates the need to track and test + # the apis deeper in processing to select the correct elements and + # avoid duplicates. + # Schema validation should prevent duplicate elements with + # overlapping api attributes, or where one element has an api + # attribute and the other does not. + + if self.genOpts.mergeApiNames: + mergeAPIs(self.reg, self.genOpts.mergeApiNames.split(','), self.genOpts.apiname) + else: + stripNonmatchingAPIs(self.reg, self.genOpts.apiname, actuallyDelete = True) + # Create dictionary of registry types from toplevel tags # and add 'name' attribute to each tag (where missing) # based on its element. # - # There's usually one block; more are OK + # There is usually one block; more are OK # Required attributes: 'name' or nested tag contents self.typedict = {} for type_elem in self.reg.findall('types/type'): - # If the doesn't already have a 'name' attribute, set + # If the does not already have a 'name' attribute, set # it from contents of its tag. if type_elem.get('name') is None: - type_elem.set('name', type_elem.find('name').text) + name_elem = type_elem.find('name') + if name_elem is None or not name_elem.text: + raise RuntimeError("Type without a name!") + type_elem.set('name', name_elem.text) self.addElementInfo(type_elem, TypeInfo(type_elem), 'type', self.typedict) # Create dictionary of registry enum groups from tags. # # Required attributes: 'name'. If no name is given, one is - # generated, but that group can't be identified and turned into an - # enum type definition - it's just a container for tags. + # generated, but that group cannot be identified and turned into an + # enum type definition - it is just a container for tags. self.groupdict = {} for group in self.reg.findall('enums'): self.addElementInfo(group, GroupInfo(group), 'group', self.groupdict) @@ -452,7 +618,7 @@ def parseTree(self): # and add 'name' attribute to each tag (where missing) # based on its element. # - # There's usually only one block; more are OK. + # There is usually only one block; more are OK. # Required attributes: 'name' or tag contents self.cmddict = {} # List of commands which alias others. Contains @@ -460,11 +626,14 @@ def parseTree(self): # for each alias cmdAlias = [] for cmd in self.reg.findall('commands/command'): - # If the doesn't already have a 'name' attribute, set + # If the does not already have a 'name' attribute, set # it from contents of its tag. name = cmd.get('name') if name is None: - name = cmd.set('name', cmd.find('proto/name').text) + name_elem = cmd.find('proto/name') + if name_elem is None or not name_elem.text: + raise RuntimeError("Command without a name!") + name = cmd.set('name', name_elem.text) ci = CmdInfo(cmd) self.addElementInfo(cmd, ci, 'command', self.cmddict) alias = cmd.get('alias') @@ -494,6 +663,7 @@ def parseTree(self): # Create dictionaries of API and extension interfaces # from toplevel and tags. self.apidict = {} + format_condition = dict() for feature in self.reg.findall('feature'): featureInfo = FeatureInfo(feature) self.addElementInfo(feature, featureInfo, 'feature', self.apidict) @@ -507,11 +677,11 @@ def parseTree(self): # Instead, generateRequiredInterface ignores elements # that extend enumerated types. # - # For tags which are actually just constants, if there's + # For tags which are actually just constants, if there is # no 'extends' tag but there is a 'value' or 'bitpos' tag, just # add an EnumInfo record to the dictionary. That works because # output generation of constants is purely dependency-based, and - # doesn't need to iterate through the XML tags. + # does not need to iterate through the XML tags. for elem in feature.findall('require'): for enum in elem.findall('enum'): addEnumInfo = False @@ -530,6 +700,11 @@ def parseTree(self): else: self.gen.logMsg('warn', 'NO matching group', groupName, 'for enum', enum.get('name'), 'found.') + if groupName == "VkFormat": + format_name = enum.get('name') + if enum.get('alias'): + format_name = enum.get('alias') + format_condition[format_name] = featureInfo.name addEnumInfo = True elif enum.get('value') or enum.get('bitpos') or enum.get('alias'): # self.gen.logMsg('diag', 'Adding extension constant "enum"', @@ -539,6 +714,9 @@ def parseTree(self): enumInfo = EnumInfo(enum) self.addElementInfo(enum, enumInfo, 'enum', self.enumdict) + sync_pipeline_stage_condition = dict() + sync_access_condition = dict() + self.extensions = self.reg.findall('extensions/extension') self.extdict = {} for feature in self.extensions: @@ -564,10 +742,10 @@ def parseTree(self): # as when redefining an enum in another extension. extnumber = enum.get('extnumber') if not extnumber: - enum.set('extnumber', featureInfo.number) + enum.set('extnumber', str(featureInfo.number)) enum.set('extname', featureInfo.name) - enum.set('supported', featureInfo.supported) + enum.set('supported', noneStr(featureInfo.supported)) # Look up the GroupInfo with matching groupName if groupName in self.groupdict: # self.gen.logMsg('diag', 'Matching group', @@ -577,6 +755,34 @@ def parseTree(self): else: self.gen.logMsg('warn', 'NO matching group', groupName, 'for enum', enum.get('name'), 'found.') + # This is Vulkan-specific + if groupName == "VkFormat": + format_name = enum.get('name') + if enum.get('alias'): + format_name = enum.get('alias') + if format_name in format_condition: + format_condition[format_name] += "," + featureInfo.name + else: + format_condition[format_name] = featureInfo.name + elif groupName == "VkPipelineStageFlagBits2": + stage_flag = enum.get('name') + if enum.get('alias'): + stage_flag = enum.get('alias') + featureName = elem.get('depends') if elem.get('depends') is not None else featureInfo.name + if stage_flag in sync_pipeline_stage_condition: + sync_pipeline_stage_condition[stage_flag] += "," + featureName + else: + sync_pipeline_stage_condition[stage_flag] = featureName + elif groupName == "VkAccessFlagBits2": + access_flag = enum.get('name') + if enum.get('alias'): + access_flag = enum.get('alias') + featureName = elem.get('depends') if elem.get('depends') is not None else featureInfo.name + if access_flag in sync_access_condition: + sync_access_condition[access_flag] += "," + featureName + else: + sync_access_condition[access_flag] = featureName + addEnumInfo = True elif enum.get('value') or enum.get('bitpos') or enum.get('alias'): # self.gen.logMsg('diag', 'Adding extension constant "enum"', @@ -586,23 +792,6 @@ def parseTree(self): enumInfo = EnumInfo(enum) self.addElementInfo(enum, enumInfo, 'enum', self.enumdict) - # Construct a "validextensionstructs" list for parent structures - # based on "structextends" tags in child structures - disabled_types = [] - for disabled_ext in self.reg.findall('extensions/extension[@supported="disabled"]'): - for type_elem in disabled_ext.findall("*/type"): - disabled_types.append(type_elem.get('name')) - for type_elem in self.reg.findall('types/type'): - if type_elem.get('name') not in disabled_types: - parentStructs = type_elem.get('structextends') - if parentStructs is not None: - for parent in parentStructs.split(','): - # self.gen.logMsg('diag', type.get('name'), 'extends', parent) - self.validextensionstructs[parent].append(type_elem.get('name')) - # Sort the lists so they don't depend on the XML order - for parent in self.validextensionstructs: - self.validextensionstructs[parent].sort() - # Parse out all spirv tags in dictionaries # Use addElementInfo to catch duplicates for spirv in self.reg.findall('spirvextensions/spirvextension'): @@ -612,6 +801,34 @@ def parseTree(self): spirvInfo = SpirvInfo(spirv) self.addElementInfo(spirv, spirvInfo, 'spirvcapability', self.spirvcapdict) + for format in self.reg.findall('formats/format'): + condition = None + format_name = format.get('name') + if format_name in format_condition: + condition = format_condition[format_name] + formatInfo = FormatInfo(format, condition) + self.addElementInfo(format, formatInfo, 'format', self.formatsdict) + + for stage in self.reg.findall('sync/syncstage'): + condition = None + stage_flag = stage.get('name') + if stage_flag in sync_pipeline_stage_condition: + condition = sync_pipeline_stage_condition[stage_flag] + syncInfo = SyncStageInfo(stage, condition) + self.addElementInfo(stage, syncInfo, 'syncstage', self.syncstagedict) + + for access in self.reg.findall('sync/syncaccess'): + condition = None + access_flag = access.get('name') + if access_flag in sync_access_condition: + condition = sync_access_condition[access_flag] + syncInfo = SyncAccessInfo(access, condition) + self.addElementInfo(access, syncInfo, 'syncaccess', self.syncaccessdict) + + for pipeline in self.reg.findall('sync/syncpipeline'): + syncInfo = SyncPipelineInfo(pipeline) + self.addElementInfo(pipeline, syncInfo, 'syncpipeline', self.syncpipelinedict) + def dumpReg(self, maxlen=120, filehandle=sys.stdout): """Dump all the dictionaries constructed from the Registry object. @@ -651,6 +868,10 @@ def dumpReg(self, maxlen=120, filehandle=sys.stdout): for key in self.spirvcapdict: write(' SPIR-V Capability', key, '->', etree.tostring(self.spirvcapdict[key].elem)[0:maxlen], file=filehandle) + write('// VkFormat', file=filehandle) + for key in self.formatsdict: + write(' VkFormat', key, '->', + etree.tostring(self.formatsdict[key].elem)[0:maxlen], file=filehandle) def markTypeRequired(self, typename, required): """Require (along with its dependencies) or remove (but not its dependencies) a type. @@ -659,6 +880,7 @@ def markTypeRequired(self, typename, required): - required - boolean (to tag features as required or not) """ self.gen.logMsg('diag', 'tagging type:', typename, '-> required =', required) + # Get TypeInfo object for tag corresponding to typename typeinfo = self.lookupElementInfo(typename, self.typedict) if typeinfo is not None: @@ -671,7 +893,7 @@ def markTypeRequired(self, typename, required): if depname: self.gen.logMsg('diag', 'Generating dependent type', depname, 'for', attrib_name, 'type', typename) - # Don't recurse on self-referential structures. + # Do not recurse on self-referential structures. if typename != depname: self.markTypeRequired(depname, required) else: @@ -713,20 +935,18 @@ def markEnumRequired(self, enumname, required): - enumname - name of enum - required - boolean (to tag features as required or not)""" - self.gen.logMsg('diag', 'tagging enum:', enumname, '-> required =', required) + self.gen.logMsg('diag', 'markEnumRequired: tagging enum:', enumname, '-> required =', required) enum = self.lookupElementInfo(enumname, self.enumdict) if enum is not None: # If the enum is part of a group, and is being removed, then - # look it up in that tag and remove it there, so that it - # isn't visible to generators (which traverse the tag - # elements themselves). - # This isn't the most robust way of doing this, since a removed - # enum that's later required again will no longer have a group - # element, but it makes the change non-intrusive on generator - # code. - if required is False: + # look it up in that tag and remove the Element there, + # so that it is not visible to generators (which traverse the + # tag elements rather than using the dictionaries). + if not required: groupName = enum.elem.get('extends') if groupName is not None: + self.gen.logMsg('diag', f'markEnumRequired: Removing extending enum {enum.elem.get("name")}') + # Look up the Info with matching groupName if groupName in self.groupdict: gi = self.groupdict[groupName] @@ -735,23 +955,42 @@ def markEnumRequired(self, enumname, required): # Remove copy of this enum from the group gi.elem.remove(gienum) else: - self.gen.logMsg('warn', 'Cannot remove enum', + self.gen.logMsg('warn', 'markEnumRequired: Cannot remove enum', enumname, 'not found in group', groupName) else: - self.gen.logMsg('warn', 'Cannot remove enum', + self.gen.logMsg('warn', 'markEnumRequired: Cannot remove enum', enumname, 'from nonexistent group', groupName) + else: + # This enum is not an extending enum. + # The XML tree must be searched for all that + # might have it, so we know the parent to delete from. + + enumName = enum.elem.get('name') + + self.gen.logMsg('diag', f'markEnumRequired: Removing non-extending enum {enumName}') + + count = 0 + for enums in self.reg.findall('enums'): + for thisEnum in enums.findall('enum'): + if thisEnum.get('name') == enumName: + # Actually remove it + count = count + 1 + enums.remove(thisEnum) + + if count == 0: + self.gen.logMsg('warn', f'markEnumRequired: {enumName}) not found in any tag') enum.required = required # Tag enum dependencies in 'alias' attribute as required depname = enum.elem.get('alias') if depname: - self.gen.logMsg('diag', 'Generating dependent enum', + self.gen.logMsg('diag', 'markEnumRequired: Generating dependent enum', depname, 'for alias', enumname, 'required =', enum.required) self.markEnumRequired(depname, required) else: - self.gen.logMsg('warn', 'enum:', enumname, 'IS NOT DEFINED') + self.gen.logMsg('warn', f'markEnumRequired: {enumname} IS NOT DEFINED') def markCmdRequired(self, cmdname, required): """Mark a command as required or not. @@ -762,14 +1001,25 @@ def markCmdRequired(self, cmdname, required): cmd = self.lookupElementInfo(cmdname, self.cmddict) if cmd is not None: cmd.required = required + # Tag command dependencies in 'alias' attribute as required - depname = cmd.elem.get('alias') - if depname: - self.gen.logMsg('diag', 'Generating dependent command', - depname, 'for alias', cmdname) - self.markCmdRequired(depname, required) + # + # This is usually not done, because command 'aliases' are not + # actual C language aliases like type and enum aliases. Instead + # they are just duplicates of the function signature of the + # alias. This means that there is no dependency of a command + # alias on what it aliases. One exception is validity includes, + # where the spec markup needs the promoted-to validity include + # even if only the promoted-from command is being built. + if self.genOpts.requireCommandAliases: + depname = cmd.elem.get('alias') + if depname: + self.gen.logMsg('diag', 'Generating dependent command', + depname, 'for alias', cmdname) + self.markCmdRequired(depname, required) + # Tag all parameter types of this command as required. - # This DOES NOT remove types of commands in a + # This does not remove types of commands in a # tag, because many other commands may use the same type. # We could be more clever and reference count types, # instead of using a boolean. @@ -792,11 +1042,12 @@ def markRequired(self, featurename, feature, required): # Loop over types, enums, and commands in the tag # @@ It would be possible to respect 'api' and 'profile' attributes - # in individual features, but that's not done yet. + # in individual features, but that is not done yet. for typeElem in feature.findall('type'): self.markTypeRequired(typeElem.get('name'), required) for enumElem in feature.findall('enum'): self.markEnumRequired(enumElem.get('name'), required) + for cmdElem in feature.findall('command'): self.markCmdRequired(cmdElem.get('name'), required) @@ -831,6 +1082,8 @@ def getAlias(self, elem, dict): if alias is None: name = elem.get('name') typeinfo = self.lookupElementInfo(name, dict) + if not typeinfo: + self.gen.logMsg('error', name, 'is not a known name') alias = typeinfo.elem.get('alias') return alias @@ -842,8 +1095,13 @@ def checkForCorrectionAliases(self, alias, require, tag): - require - `` block from the registry - tag - tag to look for in the require block""" - if alias and require.findall(tag + "[@name='" + alias + "']"): - return True + # For the time being, the code below is bypassed. It has the effect + # of excluding "spelling aliases" created to comply with the style + # guide, but this leaves references out of the specification and + # causes broken internal links. + # + # if alias and require.findall(tag + "[@name='" + alias + "']"): + # return True return False @@ -877,9 +1135,12 @@ def fillFeatureDictionary(self, interface, featurename, api, profile): # Determine the required extension or version needed for a require block # Assumes that only one of these is specified - required_key = require.get('feature') - if required_key is None: - required_key = require.get('extension') + # 'extension', and therefore 'required_key', may be a boolean + # expression of extension names. + # 'required_key' is used only as a dictionary key at + # present, and passed through to the script generators, so + # they must be prepared to parse that boolean expression. + required_key = require.get('depends') # Loop over types, enums, and commands in the tag for typeElem in require.findall('type'): @@ -887,7 +1148,7 @@ def fillFeatureDictionary(self, interface, featurename, api, profile): typeinfo = self.lookupElementInfo(typename, self.typedict) if typeinfo: - # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. + # Remove aliases in the same extension/feature; these are always added as a correction. Do not need the original to be visible. alias = self.getAlias(typeElem, self.typedict) if not self.checkForCorrectionAliases(alias, require, 'type'): # Resolve the type info to the actual type, so we get an accurate read for 'structextends' @@ -902,12 +1163,15 @@ def fillFeatureDictionary(self, interface, featurename, api, profile): if not typeextends in self.gen.featureDictionary[featurename][typecat][required_key]: self.gen.featureDictionary[featurename][typecat][required_key][typeextends] = [] self.gen.featureDictionary[featurename][typecat][required_key][typeextends].append(typename) + else: + self.gen.logMsg('warn', 'fillFeatureDictionary: NOT filling for {}'.format(typename)) + for enumElem in require.findall('enum'): enumname = enumElem.get('name') typeinfo = self.lookupElementInfo(enumname, self.enumdict) - # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. + # Remove aliases in the same extension/feature; these are always added as a correction. Do not need the original to be visible. alias = self.getAlias(enumElem, self.enumdict) if not self.checkForCorrectionAliases(alias, require, 'enum'): enumextends = enumElem.get('extends') @@ -916,29 +1180,42 @@ def fillFeatureDictionary(self, interface, featurename, api, profile): if not enumextends in self.gen.featureDictionary[featurename]['enumconstant'][required_key]: self.gen.featureDictionary[featurename]['enumconstant'][required_key][enumextends] = [] self.gen.featureDictionary[featurename]['enumconstant'][required_key][enumextends].append(enumname) + else: + self.gen.logMsg('warn', 'fillFeatureDictionary: NOT filling for {}'.format(typename)) for cmdElem in require.findall('command'): - - # Remove aliases in the same extension/feature; these are always added as a correction. Don't need the original to be visible. + # Remove aliases in the same extension/feature; these are always added as a correction. Do not need the original to be visible. alias = self.getAlias(cmdElem, self.cmddict) if not self.checkForCorrectionAliases(alias, require, 'command'): if not required_key in self.gen.featureDictionary[featurename]['command']: self.gen.featureDictionary[featurename]['command'][required_key] = [] self.gen.featureDictionary[featurename]['command'][required_key].append(cmdElem.get('name')) + else: + self.gen.logMsg('warn', 'fillFeatureDictionary: NOT filling for {}'.format(typename)) - - def requireAndRemoveFeatures(self, interface, featurename, api, profile): - """Process `` and `` tags for a `` or ``. + def requireFeatures(self, interface, featurename, api, profile): + """Process `` tags for a `` or ``. - interface - Element for `` or ``, containing - `` and `` tags + `` tags - featurename - name of the feature - api - string specifying API name being generated - profile - string specifying API profile being generated""" + # marks things that are required by this version/profile for feature in interface.findall('require'): if matchAPIProfile(api, profile, feature): self.markRequired(featurename, feature, True) + + def removeFeatures(self, interface, featurename, api, profile): + """Process `` tags for a `` or ``. + + - interface - Element for `` or ``, containing + `` tags + - featurename - name of the feature + - api - string specifying API name being generated + - profile - string specifying API profile being generated""" + # marks things that are removed by this version/profile for feature in interface.findall('remove'): if matchAPIProfile(api, profile, feature): @@ -954,6 +1231,7 @@ def assignAdditionalValidity(self, interface, api, profile): if v.get('struct'): self.typedict[v.get('struct')].additionalValidity.append(copy.deepcopy(v)) + def removeAdditionalValidity(self, interface, api, profile): # Loop over all usage inside all tags. for feature in interface.findall('remove'): if matchAPIProfile(api, profile, feature): @@ -963,15 +1241,23 @@ def assignAdditionalValidity(self, interface, api, profile): if v.get('struct'): self.typedict[v.get('struct')].removedValidity.append(copy.deepcopy(v)) - def generateFeature(self, fname, ftype, dictionary): + def generateFeature(self, fname, ftype, dictionary, explicit=False): """Generate a single type / enum group / enum / command, and all its dependencies as needed. - fname - name of feature (``/``/``) - ftype - type of feature, 'type' | 'enum' | 'command' - - dictionary - of *Info objects - self.{type|enum|cmd}dict""" + - dictionary - of *Info objects - self.{type|enum|cmd}dict + - explicit - True if this is explicitly required by the top-level + XML tag, False if it is a dependency of an explicit + requirement.""" self.gen.logMsg('diag', 'generateFeature: generating', ftype, fname) + + if not (explicit or self.genOpts.requireDepends): + self.gen.logMsg('diag', 'generateFeature: NOT generating', ftype, fname, 'because generator does not require dependencies') + return + f = self.lookupElementInfo(fname, dictionary) if f is None: # No such feature. This is an error, but reported earlier @@ -979,7 +1265,7 @@ def generateFeature(self, fname, ftype, dictionary): 'returning!') return - # If feature isn't required, or has already been declared, return + # If feature is not required, or has already been declared, return if not f.required: self.gen.logMsg('diag', 'Skipping', ftype, fname, '(not required)') return @@ -1056,7 +1342,7 @@ def generateFeature(self, fname, ftype, dictionary): # @ The enum group is not ready for generation. At this # @ point, it contains all tags injected by # @ tags without any verification of whether - # @ they're required or not. It may also contain + # @ they are required or not. It may also contain # @ duplicates injected by multiple consistent # @ definitions of an . @@ -1081,7 +1367,8 @@ def generateFeature(self, fname, ftype, dictionary): if extname is not None: # 'supported' attribute was injected when the element was # moved into the group in Registry.parseTree() - if self.genOpts.defaultExtensions == elem.get('supported'): + supported_list = elem.get('supported').split(",") + if self.genOpts.defaultExtensions in supported_list: required = True elif re.match(self.genOpts.addExtensions, extname) is not None: required = True @@ -1094,7 +1381,7 @@ def generateFeature(self, fname, ftype, dictionary): if required: # Mark this element as required (in the element, not the EnumInfo) elem.set('required', 'true') - # If it's an alias, track that for later use + # If it is an alias, track that for later use enumAlias = elem.get('alias') if enumAlias: enumAliases.append(enumAlias) @@ -1103,6 +1390,8 @@ def generateFeature(self, fname, ftype, dictionary): if name in enumAliases: elem.set('required', 'true') self.gen.logMsg('diag', '* also need to require alias', name) + if f is None: + raise RuntimeError("Should not get here") if f.elem.get('category') == 'bitmask': followupFeature = f.elem.get('bitvalues') elif ftype == 'command': @@ -1125,6 +1414,8 @@ def generateFeature(self, fname, ftype, dictionary): # Actually generate the type only if emitting declarations if self.emitFeatures: self.gen.logMsg('diag', 'Emitting', ftype, 'decl for', fname) + if genProc is None: + raise RuntimeError("genProc is None when we should be emitting") genProc(f, fname, alias) else: self.gen.logMsg('diag', 'Skipping', ftype, fname, @@ -1143,16 +1434,16 @@ def generateRequiredInterface(self, interface): # Loop over all features inside all tags. for features in interface.findall('require'): for t in features.findall('type'): - self.generateFeature(t.get('name'), 'type', self.typedict) + self.generateFeature(t.get('name'), 'type', self.typedict, explicit=True) for e in features.findall('enum'): - # If this is an enum extending an enumerated type, don't + # If this is an enum extending an enumerated type, do not # generate it - this has already been done in reg.parseTree, # by copying this element into the enumerated type. enumextends = e.get('extends') if not enumextends: - self.generateFeature(e.get('name'), 'enum', self.enumdict) + self.generateFeature(e.get('name'), 'enum', self.enumdict, explicit=True) for c in features.findall('command'): - self.generateFeature(c.get('name'), 'command', self.cmddict) + self.generateFeature(c.get('name'), 'command', self.cmddict, explicit=True) def generateSpirv(self, spirv, dictionary): if spirv is None: @@ -1167,6 +1458,99 @@ def generateSpirv(self, spirv, dictionary): genProc = self.gen.genSpirv genProc(spirv, name, alias) + def stripUnsupportedAPIs(self, dictionary, attribute, supportedDictionary): + """Strip unsupported APIs from attributes of APIs. + dictionary - *Info dictionary of APIs to be updated + attribute - attribute name to look for in each API + supportedDictionary - dictionary in which to look for supported + API elements in the attribute""" + + for key in dictionary: + eleminfo = dictionary[key] + attribstring = eleminfo.elem.get(attribute) + if attribstring is not None: + apis = [] + stripped = False + for api in attribstring.split(','): + ##print('Checking API {} referenced by {}'.format(api, key)) + if api in supportedDictionary and supportedDictionary[api].required: + apis.append(api) + else: + stripped = True + ##print('\t**STRIPPING API {} from {}'.format(api, key)) + + # Update the attribute after stripping stuff. + # Could sort apis before joining, but it is not a clear win + if stripped: + eleminfo.elem.set(attribute, ','.join(apis)) + + def stripUnsupportedAPIsFromList(self, dictionary, supportedDictionary): + """Strip unsupported APIs from attributes of APIs. + dictionary - dictionary of list of structure name strings + supportedDictionary - dictionary in which to look for supported + API elements in the attribute""" + + for key in dictionary: + attribstring = dictionary[key] + if attribstring is not None: + apis = [] + stripped = False + for api in attribstring: + ##print('Checking API {} referenced by {}'.format(api, key)) + if supportedDictionary[api].required: + apis.append(api) + else: + stripped = True + ##print('\t**STRIPPING API {} from {}'.format(api, key)) + + # Update the attribute after stripping stuff. + # Could sort apis before joining, but it is not a clear win + if stripped: + dictionary[key] = apis + + def generateFormat(self, format, dictionary): + if format is None: + self.gen.logMsg('diag', 'No entry found for format element', + 'returning!') + return + + name = format.elem.get('name') + # No known alias for VkFormat elements + alias = None + if format.emit: + genProc = self.gen.genFormat + genProc(format, name, alias) + + def generateSyncStage(self, sync): + genProc = self.gen.genSyncStage + genProc(sync) + + def generateSyncAccess(self, sync): + genProc = self.gen.genSyncAccess + genProc(sync) + + def generateSyncPipeline(self, sync): + genProc = self.gen.genSyncPipeline + genProc(sync) + + def tagValidExtensionStructs(self): + """Construct a "validextensionstructs" list for parent structures + based on "structextends" tags in child structures. + Only do this for structures tagged as required.""" + + for typeinfo in self.typedict.values(): + type_elem = typeinfo.elem + if typeinfo.required and type_elem.get('category') == 'struct': + struct_extends = type_elem.get('structextends') + if struct_extends is not None: + for parent in struct_extends.split(','): + # self.gen.logMsg('diag', type_elem.get('name'), 'extends', parent) + self.validextensionstructs[parent].append(type_elem.get('name')) + + # Sort the lists so they do not depend on the XML order + for parent in self.validextensionstructs: + self.validextensionstructs[parent].sort() + def apiGen(self): """Generate interface for specified versions using the current generator and generator options""" @@ -1177,8 +1561,13 @@ def apiGen(self): 'profile:', self.genOpts.profile) self.gen.logMsg('diag', '*******************************************') - # Reset required/declared flags for all features - self.apiReset() + # Could reset required/declared flags for all features here. + # This has been removed as never used. The initial motivation was + # the idea of calling apiGen() repeatedly for different targets, but + # this has never been done. The 20% or so build-time speedup that + # might result is not worth the effort to make it actually work. + # + # self.apiReset() # Compile regexps used to select versions & extensions regVersions = re.compile(self.genOpts.versions) @@ -1187,6 +1576,7 @@ def apiGen(self): regRemoveExtensions = re.compile(self.genOpts.removeExtensions) regEmitExtensions = re.compile(self.genOpts.emitExtensions) regEmitSpirv = re.compile(self.genOpts.emitSpirv) + regEmitFormats = re.compile(self.genOpts.emitFormats) # Get all matching API feature names & add to list of FeatureInfo # Note we used to select on feature version attributes, not names. @@ -1224,7 +1614,7 @@ def apiGen(self): # Get all matching extensions, in order by their extension number, # and add to the list of features. - # Start with extensions tagged with 'api' pattern matching the API + # Start with extensions whose 'supported' attributes match the API # being generated. Add extensions matching the pattern specified in # regExtensions, then remove extensions matching the pattern # specified in regRemoveExtensions @@ -1242,9 +1632,9 @@ def apiGen(self): # Include additional extensions if the extension name matches # the regexp specified in the generator options. This allows - # forcing extensions into an interface even if they're not + # forcing extensions into an interface even if they are not # tagged appropriately in the registry. - # However we still respect the 'supported' attribute. + # However, we still respect the 'supported' attribute. if regAddExtensions.match(extName) is not None: if not apiNameMatch(self.genOpts.apiname, ei.elem.get('supported')): self.gen.logMsg('diag', 'NOT including extension', @@ -1256,7 +1646,7 @@ def apiGen(self): include = True # Remove extensions if the name matches the regexp specified # in generator options. This allows forcing removal of - # extensions from an interface even if they're tagged that + # extensions from an interface even if they are tagged that # way in the registry. if regRemoveExtensions.match(extName) is not None: self.gen.logMsg('diag', 'Removing extension', @@ -1274,8 +1664,8 @@ def apiGen(self): 'for emission (does not match emitextensions pattern)') # Hack - can be removed when validity generator goes away - # (Jon) I'm not sure what this does, or if it should respect - # the ei.emit flag above. + # (Jon) I am not sure what this does, or if it should + # respect the ei.emit flag above. self.requiredextensions.append(extName) else: self.gen.logMsg('diag', 'NOT including extension', @@ -1295,41 +1685,68 @@ def apiGen(self): si.emit = (regEmitSpirv.match(key) is not None) spirvcaps.append(si) + formats = [] + for key in self.formatsdict: + si = self.formatsdict[key] + si.emit = (regEmitFormats.match(key) is not None) + formats.append(si) + # Sort the features list, if a sort procedure is defined if self.genOpts.sortProcedure: self.genOpts.sortProcedure(features) - # print('sortProcedure ->', [f.name for f in features]) - # Pass 1: loop over requested API versions and extensions tagging + # Passes 1+2: loop over requested API versions and extensions tagging # types/commands/features as required (in an block) or no - # longer required (in an block). It is possible to remove - # a feature in one version and restore it later by requiring it in - # a later version. + # longer required (in an block). s are processed + # after all s, so removals win. # If a profile other than 'None' is being generated, it must # match the profile attribute (if any) of the and # tags. self.gen.logMsg('diag', 'PASS 1: TAG FEATURES') for f in features: - self.gen.logMsg('diag', 'PASS 1: Tagging required and removed features for', - f.name) + self.gen.logMsg('diag', 'PASS 1: Tagging required and features for', f.name) self.fillFeatureDictionary(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) - self.requireAndRemoveFeatures(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) + self.requireFeatures(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) self.assignAdditionalValidity(f.elem, self.genOpts.apiname, self.genOpts.profile) - # Pass 2: loop over specified API versions and extensions printing - # declarations for required things which haven't already been + for f in features: + self.gen.logMsg('diag', 'PASS 2: Tagging removed features for', f.name) + self.removeFeatures(f.elem, f.name, self.genOpts.apiname, self.genOpts.profile) + self.removeAdditionalValidity(f.elem, self.genOpts.apiname, self.genOpts.profile) + + # Now, strip references to APIs that are not required. + # At present such references may occur in: + # Structs in 'structextends' attributes + # Enums in 'successcodes' and 'errorcodes' attributes + self.stripUnsupportedAPIs(self.typedict, 'structextends', self.typedict) + self.stripUnsupportedAPIs(self.cmddict, 'successcodes', self.enumdict) + self.stripUnsupportedAPIs(self.cmddict, 'errorcodes', self.enumdict) + self.stripUnsupportedAPIsFromList(self.validextensionstructs, self.typedict) + + # Construct lists of valid extension structures + self.tagValidExtensionStructs() + + # @@May need to strip / + # tags of these forms: + # + # + # + # + + # Pass 3: loop over specified API versions and extensions printing + # declarations for required things which have not already been # generated. - self.gen.logMsg('diag', 'PASS 2: GENERATE INTERFACES FOR FEATURES') + self.gen.logMsg('diag', 'PASS 3: GENERATE INTERFACES FOR FEATURES') self.gen.beginFile(self.genOpts) for f in features: - self.gen.logMsg('diag', 'PASS 2: Generating interface for', + self.gen.logMsg('diag', 'PASS 3: Generating interface for', f.name) emit = self.emitFeatures = f.emit if not emit: - self.gen.logMsg('diag', 'PASS 2: NOT declaring feature', + self.gen.logMsg('diag', 'PASS 3: NOT declaring feature', f.elem.get('name'), 'because it is not tagged for emission') # Generate the interface (or just tag its elements as having been - # emitted, if they haven't been). + # emitted, if they have not been). self.gen.beginFeature(f.elem, emit) self.generateRequiredInterface(f.elem) self.gen.endFeature() @@ -1338,6 +1755,14 @@ def apiGen(self): self.generateSpirv(s, self.spirvextdict) for s in spirvcaps: self.generateSpirv(s, self.spirvcapdict) + for s in formats: + self.generateFormat(s, self.formatsdict) + for s in self.syncstagedict: + self.generateSyncStage(self.syncstagedict[s]) + for s in self.syncaccessdict: + self.generateSyncAccess(self.syncaccessdict[s]) + for s in self.syncpipelinedict: + self.generateSyncPipeline(self.syncpipelinedict[s]) self.gen.endFile() def apiReset(self): @@ -1352,46 +1777,3 @@ def apiReset(self): self.cmddict[cmd].resetState() for cmd in self.apidict: self.apidict[cmd].resetState() - - def __validateStructLimittypes(self, struct): - """Validate 'limittype' attributes for a single struct.""" - limittypeDiags = namedtuple('limittypeDiags', ['missing', 'invalid']) - badFields = defaultdict(lambda : limittypeDiags(missing=[], invalid=[])) - validLimittypes = { 'min', 'max', 'bitmask', 'range', 'struct', 'noauto' } - for member in struct.getMembers(): - memberName = member.findtext('name') - if memberName in ['sType', 'pNext']: - continue - limittype = member.get('limittype') - if not limittype: - badFields[struct.elem.get('name')].missing.append(memberName) - elif limittype == 'struct': - typeName = member.findtext('type') - memberType = self.typedict[typeName] - badFields.update(self.__validateStructLimittypes(memberType)) - elif limittype not in validLimittypes: - badFields[struct.elem.get('name')].invalid.append(memberName) - return badFields - - def __validateLimittype(self): - """Validate 'limittype' attributes.""" - self.gen.logMsg('diag', 'VALIDATING LIMITTYPE ATTRIBUTES') - badFields = self.__validateStructLimittypes(self.typedict['VkPhysicalDeviceProperties2']) - for featStructName in self.validextensionstructs['VkPhysicalDeviceProperties2']: - featStruct = self.typedict[featStructName] - badFields.update(self.__validateStructLimittypes(featStruct)) - - if badFields: - self.gen.logMsg('diag', 'SUMMARY OF FIELDS WITH INCORRECT LIMITTYPES') - for key in sorted(badFields.keys()): - diags = badFields[key] - if diags.missing: - self.gen.logMsg('diag', ' ', key, 'missing limittype:', ', '.join(badFields[key].missing)) - if diags.invalid: - self.gen.logMsg('diag', ' ', key, 'invalid limittype:', ', '.join(badFields[key].invalid)) - return False - return True - - def validateRegistry(self): - """Validate properties of the registry.""" - return self.__validateLimittype() diff --git a/scripts/scriptgenerator.py b/scripts/scriptgenerator.py new file mode 100644 index 00000000..f5ed14d0 --- /dev/null +++ b/scripts/scriptgenerator.py @@ -0,0 +1,390 @@ +#!/usr/bin/python3 -i +# +# Copyright 2013-2024 The Khronos Group Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +from generator import OutputGenerator, enquote, noneStr + +def mostOfficial(api, newapi): + """Return the 'most official' of two related names, api and newapi. + KHR is more official than EXT is more official than everything else. + If there is ambiguity, return api. + Accommodate APIs using lower-case vendor suffixes.""" + + apicat = api[-3:].upper() + newapicat = newapi[-3:].upper() + + if apicat == 'KHR': + return api + if newapicat == 'KHR': + return newapi; + if apicat == 'EXT': + return api + if newapicat == 'EXT': + return newapi; + return api + +class ScriptOutputGenerator(OutputGenerator): + """ScriptOutputGenerator - subclass of OutputGenerator. + Base class to Generate script (Python/Ruby/JS/etc.) data structures + describing API names and relationships. + Similar to DocOutputGenerator, but writes a single file.""" + + def apiName(self, name): + """Return True if name is in the reserved API namespace. + + Delegates to the conventions object. """ + return self.genOpts.conventions.is_api_name(name) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Track features being generated + self.features = [] + + # Reverse map from interface names to features requiring them + self.apimap = {} + + # Reverse map from unsupported APIs in this build to aliases which + # are supported + self.nonexistent = {} + + def beginFile(self, genOpts): + OutputGenerator.beginFile(self, genOpts) + # + # Dictionaries are keyed by the name of the entity (e.g. + # self.structs is keyed by structure names). Values are + # the names of related entities (e.g. structs contain + # a list of type names of members, enums contain a list + # of enumerants belong to the enumerated type, etc.), or + # just None if there are no directly related entities. + # + # Collect the mappings, then emit the Python script in endFile + self.basetypes = {} + self.consts = {} + self.enums = {} + self.flags = {} + self.funcpointers = {} + self.protos = {} + self.structs = {} + self.handles = {} + self.defines = {} + self.alias = {} + # Dictionary containing the type of a type name + # (e.g. the string name of the dictionary with its contents). + self.typeCategory = {} + self.mapDict = {} + + def addInterfaceMapping(self, api, feature, required): + """Add a reverse mapping in self.apimap from an API to a feature + requiring that API. + + - api - name of the API + - feature - name of the feature requiring it + - required - None, or an additional feature dependency within + 'feature'. The additional dependency is a boolean expression of + one or more extension and/or core version names, which is passed + through to the output script intact.""" + + # Each entry in self.apimap contains one or more + # ( feature, required ) tuples. + deps = ( feature, required ) + + if api in self.apimap: + self.apimap[api].append(deps) + else: + self.apimap[api] = [ deps ] + + def mapInterfaceKeys(self, feature, key): + """Construct reverse mapping of APIs to features requiring them in + self.apimap. + + - feature - name of the feature being generated + - key - API category - 'define', 'basetype', etc.""" + + dict = self.featureDictionary[feature][key] + + if dict: + # Not clear why handling of command vs. type APIs is different - + # see interfacedocgenerator.py, which this was based on. + if key == 'command': + for required in dict: + for api in dict[required]: + self.addInterfaceMapping(api, feature, required) + else: + for required in dict: + for parent in dict[required]: + for api in dict[required][parent]: + self.addInterfaceMapping(api, feature, required) + + def mapInterfaces(self, feature): + """Construct reverse mapping of APIs to features requiring them in + self.apimap. + + - feature - name of the feature being generated""" + + # Map each category of interface + self.mapInterfaceKeys(feature, 'basetype') + self.mapInterfaceKeys(feature, 'bitmask') + self.mapInterfaceKeys(feature, 'command') + self.mapInterfaceKeys(feature, 'define') + self.mapInterfaceKeys(feature, 'enum') + self.mapInterfaceKeys(feature, 'enumconstant') + self.mapInterfaceKeys(feature, 'funcpointer') + self.mapInterfaceKeys(feature, 'handle') + self.mapInterfaceKeys(feature, 'include') + self.mapInterfaceKeys(feature, 'struct') + self.mapInterfaceKeys(feature, 'union') + + def endFile(self): + super().endFile() + + def beginFeature(self, interface, emit): + # Start processing in superclass + OutputGenerator.beginFeature(self, interface, emit) + + # Add this feature to the list being tracked + self.features.append( self.featureName ) + + def endFeature(self): + # Finish processing in superclass + OutputGenerator.endFeature(self) + + def addName(self, dict, name, value): + """Add a string entry to the dictionary, quoting it so it gets + printed out correctly in self.endFile().""" + dict[name] = value + + def addMapping(self, baseType, refType): + """Add a mapping between types to mapDict. + + Only include API types, so we do not end up with a lot of useless + uint32_t and void types.""" + if not self.apiName(baseType) or not self.apiName(refType): + self.logMsg('diag', 'ScriptOutputGenerator::addMapping: IGNORE map from', baseType, '<->', refType) + return + + self.logMsg('diag', 'ScriptOutputGenerator::addMapping: map from', + baseType, '<->', refType) + + if baseType not in self.mapDict: + baseDict = {} + self.mapDict[baseType] = baseDict + else: + baseDict = self.mapDict[baseType] + if refType not in self.mapDict: + refDict = {} + self.mapDict[refType] = refDict + else: + refDict = self.mapDict[refType] + + baseDict[refType] = None + refDict[baseType] = None + + def breakCheck(self, procname, name): + """Debugging aid - call from procname to break on API 'name' if it + matches logic in this call.""" + + pat = 'VkExternalFenceFeatureFlagBits' + if name[0:len(pat)] == pat: + print('{}(name = {}) matches {}'.format(procname, name, pat)) + import pdb + pdb.set_trace() + + def genType(self, typeinfo, name, alias): + """Generate type. + + - For 'struct' or 'union' types, defer to genStruct() to + add to the dictionary. + - For 'bitmask' types, add the type name to the 'flags' dictionary, + with the value being the corresponding 'enums' name defining + the acceptable flag bits. + - For 'enum' types, add the type name to the 'enums' dictionary, + with the value being '@STOPHERE@' (because this case seems + never to happen). + - For 'funcpointer' types, add the type name to the 'funcpointers' + dictionary. + - For 'handle' and 'define' types, add the handle or #define name + to the 'struct' dictionary, because that is how the spec sources + tag these types even though they are not structs.""" + OutputGenerator.genType(self, typeinfo, name, alias) + + typeElem = typeinfo.elem + # If the type is a struct type, traverse the embedded tags + # generating a structure. Otherwise, emit the tag text. + category = typeElem.get('category') + + # Add a typeCategory{} entry for the category of this type. + self.addName(self.typeCategory, name, category) + + if category in ('struct', 'union'): + self.genStruct(typeinfo, name, alias) + else: + if alias: + # Add name -> alias mapping + self.addName(self.alias, name, alias) + + # Always emit an alias (?!) + count = 1 + + # May want to only emit full type definition when not an alias? + else: + # Extract the type name + # (from self.genOpts). Copy other text through unchanged. + # If the resulting text is an empty string, do not emit it. + count = len(noneStr(typeElem.text)) + for elem in typeElem: + count += len(noneStr(elem.text)) + len(noneStr(elem.tail)) + + if count > 0: + if category == 'bitmask': + requiredEnum = typeElem.get('requires') + self.addName(self.flags, name, requiredEnum) + + # This happens when the Flags type is defined, but no + # FlagBits are defined yet. + if requiredEnum is not None: + self.addMapping(name, requiredEnum) + elif category == 'enum': + # This case does not seem to come up. It nominally would + # result from + # , + # but the output generator does not emit them directly. + self.logMsg('warn', 'ScriptOutputGenerator::genType: invalid \'enum\' category for name:', name) + elif category == 'funcpointer': + self.funcpointers[name] = None + elif category == 'handle': + self.handles[name] = None + elif category == 'define': + self.defines[name] = None + elif category == 'basetype': + self.basetypes[name] = None + self.addName(self.typeCategory, name, 'basetype') + else: + self.logMsg('diag', 'ScriptOutputGenerator::genType: unprocessed type:', name) + + def genStruct(self, typeinfo, typeName, alias): + """Generate struct (e.g. C "struct" type). + + Add the struct name to the 'structs' dictionary, with the + value being an ordered list of the struct member names.""" + OutputGenerator.genStruct(self, typeinfo, typeName, alias) + + if alias: + # Add name -> alias mapping + self.addName(self.alias, typeName, alias) + else: + # May want to only emit definition on this branch + True + + members = [member.text for member in typeinfo.elem.findall('.//member/name')] + self.structs[typeName] = members + memberTypes = [member.text for member in typeinfo.elem.findall('.//member/type')] + for member_type in memberTypes: + self.addMapping(typeName, member_type) + + def genGroup(self, groupinfo, groupName, alias): + """Generate group (e.g. C "enum" type). + + These are concatenated together with other types. + + - Add the enum type name to the 'enums' dictionary, with + the value being an ordered list of the enumerant names. + - Add each enumerant name to the 'consts' dictionary, with + the value being the enum type the enumerant is part of.""" + OutputGenerator.genGroup(self, groupinfo, groupName, alias) + groupElem = groupinfo.elem + + # Add a typeCategory{} entry for the category of this type. + self.addName(self.typeCategory, groupName, 'group') + + if alias: + # Add name -> alias mapping + self.addName(self.alias, groupName, alias) + else: + # May want to only emit definition on this branch + True + + # Add each nested 'enum' tag + enumerants = [elem.get('name') for elem in groupElem.findall('enum')] + for name in enumerants: + self.addName(self.consts, name, groupName) + + # Sort enums for output stability, since their order is irrelevant + self.enums[groupName] = sorted(enumerants) + + def genEnum(self, enuminfo, name, alias): + """Generate enumerant (compile time constant). + + - Add the constant name to the 'consts' dictionary, with the + value being None to indicate that the constant is not + an enumeration value.""" + OutputGenerator.genEnum(self, enuminfo, name, alias) + + if name not in self.consts: + # Add a typeCategory{} entry for the category of this type. + self.addName(self.typeCategory, name, 'consts') + self.consts[name] = None + + if alias: + # Add name -> alias mapping + self.addName(self.alias, name, alias) + else: + # May want to only emit definition on this branch + True + + # Otherwise, do not add it to the consts dictionary because it is + # already present. This happens due to the generator 'reparentEnums' + # parameter being False, so each extension enum appears in both the + # type and in the or it originally + # came from. + + def genCmd(self, cmdinfo, name, alias): + """Generate command. + + - Add the command name to the 'protos' dictionary, with the + value being an ordered list of the parameter names.""" + OutputGenerator.genCmd(self, cmdinfo, name, alias) + + # Add a typeCategory{} entry for the category of this type. + self.addName(self.typeCategory, name, 'protos') + + if alias: + # Add name -> alias mapping + self.addName(self.alias, name, alias) + else: + # May want to only emit definition on this branch + True + + params = [param.text for param in cmdinfo.elem.findall('param/name')] + self.protos[name] = params + paramTypes = [param.text for param in cmdinfo.elem.findall('param/type')] + for param_type in paramTypes: + self.addMapping(name, param_type) + + def createInverseMap(self): + """This creates the inverse mapping of nonexistent APIs in this + build to their aliases which are supported. Must be called by + language-specific subclasses before emitting that mapping.""" + + # Map from APIs not supported in this build to aliases that are. + # When there are multiple valid choices for remapping, choose the + # most-official suffixed one (KHR > EXT > vendor). + for key in self.alias: + # If the API key is aliased to something which does not exist, + # then add the thing that does not exist to the nonexistent map. + # This is used in spec macros to make promoted extension links + # in specs built without the promoted interface refer to the + # older interface instead. + + invkey = self.alias[key] + + if invkey not in self.typeCategory: + if invkey in self.nonexistent: + # Potentially remap existing mapping to a more official + # alias. + self.nonexistent[invkey] = mostOfficial(self.nonexistent[invkey], key) + else: + # Create remapping to an alias + self.nonexistent[invkey] = key diff --git a/scripts/conventions.py b/scripts/spec_tools/conventions.py similarity index 62% rename from scripts/conventions.py rename to scripts/spec_tools/conventions.py index 34fa2ea5..5b9f6dd4 100644 --- a/scripts/conventions.py +++ b/scripts/spec_tools/conventions.py @@ -8,6 +8,8 @@ # used in generation. from enum import Enum +import abc +import re # Type categories that respond "False" to isStructAlwaysValid # basetype is home to typedefs like ..Bool32 @@ -21,13 +23,21 @@ TYPES_KNOWN_ALWAYS_VALID = set(('char', 'float', 'int8_t', 'uint8_t', + 'int16_t', 'uint16_t', 'int32_t', 'uint32_t', 'int64_t', 'uint64_t', 'size_t', - 'uintptr_t', + 'intptr_t', 'uintptr_t', 'int', )) +# Split an extension name into vendor ID and name portions +EXT_NAME_DECOMPOSE_RE = re.compile(r'(?P[A-Za-z]+)_(?P[A-Za-z]+)_(?P[\w_]+)') + +# Match an API version name. +# Match object includes API prefix, major, and minor version numbers. +# This could be refined further for specific APIs. +API_VERSION_NAME_RE = re.compile(r'(?P[A-Za-z]+)_VERSION_(?P[0-9]+)_(?P[0-9]+)') class ProseListFormats(Enum): """A connective, possibly with a quantifier.""" @@ -42,7 +52,7 @@ def from_string(cls, s): return cls.OR if s == 'and': return cls.AND - return None + raise RuntimeError("Unrecognized string connective: " + s) @property def connective(self): @@ -63,18 +73,37 @@ def quantifier(self, n): return '' -class ConventionsBase: +class ConventionsBase(abc.ABC): """WG-specific conventions.""" def __init__(self): self._command_prefix = None self._type_prefix = None + def formatVersionOrExtension(self, name): + """Mark up an API version or extension name as a link in the spec.""" + + # Is this a version name? + match = API_VERSION_NAME_RE.match(name) + if match is not None: + return self.formatVersion(name, + match.group('apivariant'), + match.group('major'), + match.group('minor')) + else: + # If not, assumed to be an extension name. Might be worth checking. + return self.formatExtension(name) + + def formatVersion(self, name, apivariant, major, minor): + """Mark up an API version name as a link in the spec.""" + return '`<<{}>>`'.format(name) + def formatExtension(self, name): - """Mark up an extension name as a link the spec.""" - return '`apiext:{}`'.format(name) + """Mark up an extension name as a link in the spec.""" + return '`<<{}>>`'.format(name) @property + @abc.abstractmethod def null(self): """Preferred spelling of NULL.""" raise NotImplementedError @@ -112,6 +141,38 @@ def external_macro(self): """ return 'code:' + @property + @abc.abstractmethod + def structtype_member_name(self): + """Return name of the structure type member. + + Must implement. + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def nextpointer_member_name(self): + """Return name of the structure pointer chain member. + + Must implement. + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def xml_api_name(self): + """Return the name used in the default API XML registry for the default API""" + raise NotImplementedError() + + @abc.abstractmethod + def generate_structure_type_from_name(self, structname): + """Generate a structure type name, like XR_TYPE_CREATE_INSTANCE_INFO. + + Must implement. + """ + raise NotImplementedError() + def makeStructName(self, name): """Prepend the appropriate format macro for a structure to a structure type name. @@ -139,9 +200,9 @@ def _implMakeProseList(self, elements, fmt, with_verb, comma_for_two_elts=False, Optionally adds a quantifier (like 'any') before a list of 2 or more, if specified by fmt. - Don't edit these defaults, override self.makeProseList(). + Do not edit these defaults, override self.makeProseList(). """ - assert(serial_comma) # didn't implement what we didn't need + assert(serial_comma) # did not implement what we did not need if isinstance(fmt, str): fmt = ProseListFormats.from_string(fmt) @@ -166,10 +227,12 @@ def _implMakeProseList(self, elements, fmt, with_verb, comma_for_two_elts=False, return ''.join(parts) @property + @abc.abstractmethod def file_suffix(self): """Return suffix of generated Asciidoctor files""" raise NotImplementedError + @abc.abstractmethod def api_name(self, spectype=None): """Return API or specification name for citations in ref pages. @@ -206,6 +269,7 @@ def type_prefix(self): return self._type_prefix @property + @abc.abstractmethod def api_prefix(self): """Return API token prefix. @@ -214,6 +278,56 @@ def api_prefix(self): Must implement.""" raise NotImplementedError + @property + def extension_name_prefix(self): + """Return extension name prefix. + + Typically two uppercase letters followed by an underscore. + + Assumed to be the same as api_prefix, but some APIs use different + case convntions.""" + + return self.api_prefix + + def extension_short_description(self, elem): + """Return a short description of an extension for use in refpages. + + elem is an ElementTree for the tag in the XML. + The default behavior is to use the 'type' field of this tag, but not + all APIs support this field.""" + + ext_type = elem.get('type') + + if ext_type is not None: + return f'{ext_type} extension' + else: + return '' + + @property + def write_contacts(self): + """Return whether contact list should be written to extension appendices""" + return False + + @property + def write_extension_type(self): + """Return whether extension type should be written to extension appendices""" + return True + + @property + def write_extension_number(self): + """Return whether extension number should be written to extension appendices""" + return True + + @property + def write_extension_revision(self): + """Return whether extension revision number should be written to extension appendices""" + return True + + @property + def write_refpage_include(self): + """Return whether refpage include should be written to extension appendices""" + return True + @property def api_version_prefix(self): """Return API core version token prefix. @@ -329,24 +443,43 @@ def generate_max_enum_in_docs(self): documentation includes.""" return False + @abc.abstractmethod + def extension_file_path(self, name): + """Return file path to an extension appendix relative to a directory + containing all such appendices. + - name - extension name + + Must implement.""" + raise NotImplementedError - def extension_include_string(self, ext): + def extension_include_string(self, name): """Return format string for include:: line for an extension appendix - file. ext is an object with the following members: - - name - extension string string - - vendor - vendor portion of name - - barename - remainder of name + file. + - name - extension name""" - Must implement.""" - raise NotImplementedError + return 'include::{{appendices}}/{}[]'.format( + self.extension_file_path(name)) @property - def refpage_generated_include_path(self): + def provisional_extension_warning(self): + """Return True if a warning should be included in extension + appendices for provisional extensions.""" + return True + + @property + def generated_include_path(self): """Return path relative to the generated reference pages, to the - generated API include files. + generated API include files.""" - Must implement.""" - raise NotImplementedError + return '{generated}' + + @property + def include_extension_appendix_in_refpage(self): + """Return True if generating extension refpages by embedding + extension appendix content (default), False otherwise + (OpenXR).""" + + return True def valid_flag_bit(self, bitpos): """Return True if bitpos is an allowed numeric bit position for @@ -356,3 +489,41 @@ def valid_flag_bit(self, bitpos): or 64 bits), and may depend on assumptions about compiler handling of sign bits in enumerated types, as well.""" return True + + @property + def duplicate_aliased_structs(self): + """ + Should aliased structs have the original struct definition listed in the + generated docs snippet? + """ + return False + + @property + def protectProtoComment(self): + """Return True if generated #endif should have a comment matching + the protection symbol used in the opening #ifdef/#ifndef.""" + return False + + @property + def extra_refpage_headers(self): + """Return any extra headers (preceding the title) for generated + reference pages.""" + return '' + + @property + def extra_refpage_body(self): + """Return any extra text (following the title) for generated + reference pages.""" + return '' + + def is_api_version_name(self, name): + """Return True if name is an API version name.""" + + return API_VERSION_NAME_RE.match(name) is not None + + @property + def docgen_language(self): + """Return the language to be used in docgenerator [source] + blocks.""" + + return 'c++' diff --git a/scripts/spec_tools/util.py b/scripts/spec_tools/util.py index 3dde0bd4..e67038a5 100644 --- a/scripts/spec_tools/util.py +++ b/scripts/spec_tools/util.py @@ -1,18 +1,7 @@ """Utility functions not closely tied to other spec_tools types.""" # Copyright (c) 2018-2019 Collabora, Ltd. -# Copyright (c) 2013-2024 The Khronos Group Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Copyright 2013-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 def getElemName(elem, default=None): diff --git a/xml/cl.xml b/xml/cl.xml index 44aac610..fa6b2997 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -305,11 +305,11 @@ server's OpenCL/api-docs repository. cl_version version - charname[CL_NAME_VERSION_MAX_NAME_SIZE] + char name[CL_NAME_VERSION_MAX_NAME_SIZE] cl_version_khr version - charname[CL_NAME_VERSION_MAX_NAME_SIZE_KHR] + char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR] cl_uint pci_domain @@ -321,7 +321,7 @@ server's OpenCL/api-docs repository. cl_command_queue_properties properties cl_command_queue_capabilities_intel capabilities cl_uint count - charname[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL] + char name[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL] #define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1) #define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1) @@ -336,12 +336,12 @@ server's OpenCL/api-docs repository. (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ ((patch) & CL_VERSION_PATCH_MASK_KHR)) - cl_boolsigned_accelerated - cl_boolunsigned_accelerated - cl_boolmixed_signedness_accelerated - cl_boolaccumulating_saturating_signed_accelerated - cl_boolaccumulating_saturating_unsigned_accelerated - cl_boolaccumulating_saturating_mixed_signedness_accelerated + cl_bool signed_accelerated + cl_bool unsigned_accelerated + cl_bool mixed_signedness_accelerated + cl_bool accumulating_saturating_signed_accelerated + cl_bool accumulating_saturating_unsigned_accelerated + cl_bool accumulating_saturating_mixed_signedness_accelerated cl_uint arg_index @@ -2098,7 +2098,7 @@ server's OpenCL/api-docs repository. - + @@ -5359,7 +5359,7 @@ server's OpenCL/api-docs repository. - + @@ -5406,7 +5406,7 @@ server's OpenCL/api-docs repository. - + @@ -5453,7 +5453,7 @@ server's OpenCL/api-docs repository. - + @@ -5502,7 +5502,7 @@ server's OpenCL/api-docs repository. - + @@ -5530,7 +5530,7 @@ server's OpenCL/api-docs repository. - + @@ -5544,7 +5544,7 @@ server's OpenCL/api-docs repository. - + @@ -5552,7 +5552,7 @@ server's OpenCL/api-docs repository. - + @@ -5578,7 +5578,7 @@ server's OpenCL/api-docs repository. - + @@ -5629,7 +5629,7 @@ server's OpenCL/api-docs repository. - + @@ -5643,13 +5643,13 @@ server's OpenCL/api-docs repository. - + - + @@ -5664,7 +5664,7 @@ server's OpenCL/api-docs repository. - + @@ -5687,7 +5687,7 @@ server's OpenCL/api-docs repository. - + @@ -5698,7 +5698,7 @@ server's OpenCL/api-docs repository. - + @@ -5900,7 +5900,7 @@ server's OpenCL/api-docs repository. - + @@ -5930,7 +5930,7 @@ server's OpenCL/api-docs repository. - + @@ -5945,7 +5945,7 @@ server's OpenCL/api-docs repository. - + @@ -5961,7 +5961,7 @@ server's OpenCL/api-docs repository. - + @@ -5977,7 +5977,7 @@ server's OpenCL/api-docs repository. - + @@ -6413,7 +6413,7 @@ server's OpenCL/api-docs repository. - + @@ -6511,7 +6511,7 @@ server's OpenCL/api-docs repository. - + @@ -6522,7 +6522,7 @@ server's OpenCL/api-docs repository. - + @@ -6532,7 +6532,7 @@ server's OpenCL/api-docs repository. - + @@ -6666,7 +6666,7 @@ server's OpenCL/api-docs repository. - + @@ -6747,7 +6747,7 @@ server's OpenCL/api-docs repository. - + @@ -6755,7 +6755,7 @@ server's OpenCL/api-docs repository. - + @@ -6821,7 +6821,7 @@ server's OpenCL/api-docs repository. - + @@ -6892,7 +6892,7 @@ server's OpenCL/api-docs repository. - + @@ -6903,7 +6903,7 @@ server's OpenCL/api-docs repository. - + @@ -6932,7 +6932,7 @@ server's OpenCL/api-docs repository. - + @@ -6950,7 +6950,7 @@ server's OpenCL/api-docs repository. - + @@ -6997,7 +6997,7 @@ server's OpenCL/api-docs repository. - + @@ -7024,7 +7024,7 @@ server's OpenCL/api-docs repository. - + @@ -7032,7 +7032,7 @@ server's OpenCL/api-docs repository. - + @@ -7040,7 +7040,7 @@ server's OpenCL/api-docs repository. - + @@ -7054,7 +7054,7 @@ server's OpenCL/api-docs repository. - + @@ -7063,7 +7063,7 @@ server's OpenCL/api-docs repository. - + @@ -7090,7 +7090,7 @@ server's OpenCL/api-docs repository. - + @@ -7098,7 +7098,7 @@ server's OpenCL/api-docs repository. - + @@ -7109,7 +7109,7 @@ server's OpenCL/api-docs repository. - + @@ -7117,7 +7117,7 @@ server's OpenCL/api-docs repository. - + @@ -7161,7 +7161,7 @@ server's OpenCL/api-docs repository. - + @@ -7228,9 +7228,11 @@ server's OpenCL/api-docs repository. + + + - @@ -7306,7 +7308,7 @@ server's OpenCL/api-docs repository. - + @@ -7380,7 +7382,7 @@ server's OpenCL/api-docs repository. - + @@ -7415,6 +7417,31 @@ server's OpenCL/api-docs repository. + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xml/registry.rnc b/xml/registry.rnc index f29ba8d5..210073c2 100644 --- a/xml/registry.rnc +++ b/xml/registry.rnc @@ -1,20 +1,7 @@ -# Copyright (c) 2013-2024 The Khronos Group Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Copyright 2013-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 -# Relax NG schema for Khronos Vulkan API Registry XML -# -# See https://www.khronos.org/vulkan/ +# Relax NG schema for Khronos API Registry XML # # This definition is subject to change (mostly in the form of additions) @@ -25,13 +12,13 @@ namespace xsd = "http://www.w3.org/2001/XMLSchema-datatypes" start = element registry { ( element comment { text } ? | - Platforms * | - Tags * | - Types * | - Enums * | - Commands * | - Feature * | - Extensions * + Platforms * | + Tags * | + Types * | + Enums * | + Commands * | + Feature * | + Extensions * ) * } @@ -80,15 +67,19 @@ Types = element types { # may contain arbitrary C code. # name - name of this type, if not present in the tag # api - matches a api attribute, if present +# alias - name of a type this type aliases # requires - name of another type definition required by this one +# bitvalues - for a *Flags type, name of an enum definition that +# defines the valid values for parameters of that type # category - if present, 'enum' indicates a matching # block to generate an enumerated type for, and 'struct' # causes special interpretation of the contents of the type # tag including ... TBD ... # Other allowed values are 'include', 'define', 'handle' and 'bitmask', -# which don't change syntactic interpretation but allow organization in -# the generated header. -# comment - unused +# which do not change syntactic interpretation but allow organization +# in the generated header. +# deprecated - denotes that this type is deprecated, and why. +# Valid values: 'aliased', 'true'. # parent - only applicable if category is 'handle'. Notes another type with # the 'handle' category that acts as a parent object for this type. # returnedonly - only applicable if category is 'struct'. Notes that this @@ -100,6 +91,11 @@ Types = element types { # When present it suppresses generation of automatic validity for the # pNext member of that structure, and instead the structure is added # to pNext chain validity for the parent structures it extends. +# allowduplicate - only applicable if category is 'struct'. pNext can include +# multiple structures of this type. +# objtypeenum - name of VK_OBJECT_TYPE_* API enumerant which corresponds +# to this type. Currently only specified for category="handle" types. +# comment - descriptive text with no semantic meaning # For types without a category, contents include # - substitutes for an APIENTRY-style macro on output # - contains name of the type being defined @@ -109,21 +105,26 @@ Types = element types { # For types with category 'struct', contents should be one or more # - like for a struct or union member # len - if the member is an array, len may be one or more of the following -# things, separated by commas (one for each array indirection): another -# member of that struct, 'null-terminated' for a string, '1' to indicate it's -# just a pointer (used for nested pointers), or a latex equation (prefixed with -# 'latexmath:') +# things, separated by commas (one for each array indirection): +# another member of that struct, 'null-terminated' for a string, +# '1' to indicate it is just a pointer (used for nested pointers), +# or a latex equation (prefixed with 'latexmath:') # altlen - if len has latexmath equations, this contains equivalent C99 # expressions separated by commas. +# deprecated - denotes that this member is deprecated, and why. +# Valid values: 'ignored', 'true'. # externsync - denotes that the member should be externally synchronized # when accessed by Vulkan # optional - whether this value can be omitted by providing NULL (for # pointers), VK_NULL_HANDLE (for handles) or 0 (for bitmasks/values) +# selector - for a union member, identifies a separate enum member that +# selects which of the union's members are valid +# selection - for a member of a union, identifies an enum value indicating the member is valid # noautovalidity - tag stating that no automatic validity language should be generated # values - comma-separated list of legal values, usually used only for sType enums # - containing arbitrary text (unused) # -# *** There's a problem here: I'm not sure how to represent the +# *** There is a problem here: I am not sure how to represent the # syntax where it may contain arbitrarily interleaved text, , and # child tags. This allows only the syntax # text name text name text @@ -133,11 +134,15 @@ Type = element type { attribute api { text } ? , attribute alias { text } ? , attribute requires { text } ? , + attribute bitvalues { text } ? , attribute name { TypeName } ? , attribute category { text } ? , + attribute deprecated { text } ? , attribute parent { TypeName } ? , attribute returnedonly { text } ? , attribute structextends { text } ? , + attribute allowduplicate { text } ? , + attribute objtypeenum { text } ? , Comment ? , ( ( @@ -155,18 +160,22 @@ Type = element type { ) | ( element member { + attribute api { text } ? , attribute len { text } ? , attribute altlen { text } ? , attribute externsync { text } ? , attribute optional { text } ? , + attribute selector { text } ? , + attribute selection { EnumName } ? , attribute noautovalidity { text } ? , attribute values { text } ? , + attribute deprecated { text } ? , mixed { element type { TypeName } ? , element name { text } ? , element enum { EnumName } ? , element comment { text } ? - } + } + } | element comment { text } ) * @@ -179,10 +188,12 @@ Type = element type { # start, end - beginning and end of a numeric range # vendor - owner of the numeric range # type - 'enum' or 'bitmask', if present +# bitwidth - bit width of the enum value type. # comment - unused Enums = element enums { attribute name { text } ? , attribute type { text } ? , + attribute bitwidth { Integer } ? , attribute start { Integer } ? , attribute end { Integer } ? , Vendor ? , @@ -216,10 +227,16 @@ Enums = element enums { # # Other attributes: # api - matches a api attribute, if present -# type - 'u' (unsigned), 'ull' (uint64), or integer if not present +# type - 'uint32_t', 'uint64_t', or 'float', if present. There are +# certain conditions under which the tag must be present, or absent, +# but they are context-dependent and difficult to express in the +# RNC syntax. # name - enumerant name # alias - another enumerant this is semantically identical to -# comment - unused +# protect - additional #ifdef symbol to place around the enum +# comment - descriptive text with no semantic meaning +# deprecated - denotes that this enum is deprecated, and why. +# Valid values: 'aliased', 'ignored', 'true'. Enum = element enum { ( ( @@ -242,9 +259,11 @@ Enum = element enum { attribute alias { TypeName } ) ) ? & + attribute protect { text } ? & attribute api { text } ? & attribute type { TypeSuffix } ? & attribute name { text } & + attribute deprecated { text } ? & Comment ? ) } @@ -252,7 +271,7 @@ Enum = element enum { # defines a range of enumerants not currently being used # start, end - beginning and end of an unused numeric range # vendor - unused -# comment - unused +# comment - descriptive text with no semantic meaning Unused = element unused { attribute start { Integer } , attribute end { Integer } ? , @@ -270,28 +289,46 @@ Commands = element commands { # # There are two forms of the tag. # -# The first only has 'name' and 'alias' attributes, and no contents. +# Either form may have an 'api' attribute +# api - matches a api attribute, if present +# +# The first form only has 'name' and 'alias' attributes, and no contents. # It defines a command alias. # -# The second fully defines a command, and has the following structure: +# The second form fully defines a command, and has the following structure: # The possible attributes are not described in this comment block yet, but -# are in readme.pdf. The "prefix" and "suffix" attributes are currently +# are in registry.html. The "prefix" and "suffix" attributes are currently # present only in the OpenCL XML registry, where they are currently unused. # # is the C function prototype, including the return type # are function parameters, in order # len - if the member is an array, len may be one or more of the following -# things, separated by commas (one for each array indirection): another -# member of that struct, 'null-terminated' for a string, '1' to indicate it's -# just a pointer (used for nested pointers), or a latex equation (prefixed with -# 'latexmath:') +# things, separated by commas (one for each array indirection): +# another member of that struct, 'null-terminated' for a string, +# '1' to indicate it is just a pointer (used for nested pointers), +# or a latex equation (prefixed with 'latexmath:') # altlen - if len has latexmath equations, this contains equivalent C99 # expressions separated by commas. # externsync - denotes that the member should be externally synchronized # when accessed by Vulkan # optional - whether this value can be omitted by providing NULL (for # pointers), VK_NULL_HANDLE (for handles) or 0 (for bitmasks/values) -# noautovalidity - tag stating that no automatic validity language should be generated +# selector - for a union parameter, identifies a separate enum parameter that +# selects which of the union's members are valid +# noautovalidity - tag stating that no automatic validity language should be +# generated +# objecttype - only applicable for parameters representing a handle as +# a uint64_t value. Specifies the name of another parameter which is +# a VkObjectType or VkDebugReportObjectTypeEXT value specifying +# the type of object the handle references. +# validstructs - only applicable for parameters which are pointers to +# VkBaseInStructure or VkBaseOutStructure types, used as abstract +# placeholders. Specifies a comma-separated list of structures which +# may be passed in place of the parameter, or anywhere in the pNext +# chain of the parameter. +# stride - if the member is an array, stride specifies the name of +# another member containing the byte stride between consecutive +# elements in the array. Is assumed tightly packed if omitted. # is a name, if present # is the function / parameter name, if present (normally should # be, except for void parameters). @@ -305,16 +342,20 @@ Commands = element commands { # are related to them and also require external synchronization. Command = element command { ( attribute name { text } , - attribute alias { text } ) | + attribute alias { text } , + attribute api { text } ? + ) | ( + attribute tasks { text } ? , attribute queues { text } ? , attribute successcodes { text } ? , attribute errorcodes { text } ? , attribute renderpass { text } ? , + attribute videocoding { text } ? , attribute cmdbufferlevel { text } ? , - attribute pipeline { text } ? , attribute prefix { text } ? , attribute suffix { text } ? , + attribute api { text } ? , Comment ? , element proto { mixed { @@ -323,11 +364,16 @@ Command = element command { } } , element param { + attribute api { text } ? , attribute len { text } ? , attribute altlen { text } ? , attribute externsync { text } ? , attribute optional { text } ? , + attribute selector { text } ? , attribute noautovalidity { text } ? , + attribute objecttype { text } ? , + attribute validstructs { text } ? , + attribute stride { text } ? , mixed { element type { TypeName } ? , element name { text } ? @@ -357,7 +403,7 @@ Command = element command { # / contains features to require or remove in # this version # profile - only require/remove when generated profile matches -# comment - unused +# comment - descriptive text with no semantic meaning Feature = element feature { attribute api { text } , Name , @@ -368,7 +414,7 @@ Feature = element feature { ( element require { ProfileName ? , - ExtensionName ? , + Depends ? , Comment ? , ( InterfaceElement | @@ -391,14 +437,15 @@ Extensions = element extensions { Extension * } -# Defines the interface of an API . Like a -# tag, but with slightly different attributes: +# Each defines the interface of an API . +# Like a tag, but with slightly different attributes: # api - regexp pattern matching one or more API tags, indicating # which APIs the extension is known to work with. The only # syntax supported is {|}* and each name must # exactly match an API being generated (implicit ^$ surrounding). # name - extension name string # number - extension number (positive integer, should be unique) +# revision - extension spec revision (text, usually numeric major.minor.patch) # sortorder - order relative to other extensions, default 0 # protect - C preprocessor symbol to conditionally define the interface # platform - should be one of the platform names defined in the @@ -406,50 +453,56 @@ Extensions = element extensions { # author - name of the author (usually a company or project name) # contact - contact responsible for the tag (name and contact information) # type - 'device' or 'instance', if present -# requires - commas-separated list of extension names required by this -# extension -# requiresCore - core version of Vulkan required by the extension, e.g. -# "1.1". Defaults to "1.0". -# supported - profile name(s) supporting this extension, e.g. 'vulkan' -# or 'disabled' to never generate output. -# promotedto - Vulkan version or a name of an extension that this -# extension was promoted to; e.g. 'VK_VERSION_1_1', or -# 'VK_KHR_draw_indirect_county' -# deprecatedby - Vulkan version or a name of an extension that deprecates -# this extension. It may be empty string. -# e.g. 'VK_VERSION_1_1', or 'VK_EXT_debug_utils', or '' -# obsoletedby - Vulkan version or a name of an extension that obsoletes -# this extension. It may be empty string. -# e.g. 'VK_VERSION_1_1', or 'VK_EXT_debug_utils', or '' +# condition - C preprocessor expression (**TBD**) +# depends - boolean expression of API and/or extension names +# upon which this extension depends. +# supported - comma-separated list of API name(s) supporting this extension, +# e.g. 'opencl', or 'disabled' to never generate output. +# ratified - comma-separated list of API name(s) for which this extension +# has been ratified by Khronos. Defaults to "" if not specified. +# promotedto - API version or name of an extension that this +# extension was promoted to; e.g. 'CL_VERSION_1_1', or +# 'cl_khr_semaphore' +# deprecatedby - API version or name of an extension that deprecates +# this extension. It may be an empty string. +# e.g. 'CL_VERSION_1_1', or 'cl_khr_semaphore', or '' +# obsoletedby - API version or a name of an extension that obsoletes +# this extension. It may be an empty string. +# e.g. 'CL_VERSION_1_1', or 'cl_khr_semaphore', or '' # provisional - 'true' if this extension is released provisionally +# specialuse - contains one or more tokens separated by commas, indicating +# a special purpose of the extension. Tokens may include 'cadsupport', +# 'd3demulation', 'devtools', 'debugging', and 'glemulation'. Others +# may be added in the future. # In addition, / tags also support an api attribute: # api - only require/remove these features for the matching API. # Not a regular expression. Extension = element extension { Name , attribute number { Integer } ? , + attribute revision { text } ? , attribute sortorder { xsd:integer } ?, attribute protect { text } ? , attribute platform { text } ? , attribute author { text } ? , attribute contact { text } ? , attribute type { text } ? , - attribute requires { text } ? , - attribute requiresCore { text } ? , attribute condition { text } ? , + attribute depends { text } ?, attribute supported { StringGroup } ? , + attribute ratified { text } ? , attribute promotedto { text } ? , attribute deprecatedby { text } ? , attribute obsoletedby { text } ? , attribute provisional { text } ? , + attribute specialuse { text } ? , Comment ? , ( element require { attribute api { text } ? , attribute condition { text } ? , ProfileName ? , - ExtensionName ? , - FeatureName ? , + Depends ? , Comment ? , ( InterfaceElement | @@ -485,7 +538,7 @@ InterfaceElement = } # Integers are allowed to be either decimal or C-hex (0x[0-9A-F]+), but -# XML Schema types don't seem to support hex notation, so we use this +# XML Schema types do not seem to support hex notation, so we use this # as a placeholder. Integer = text @@ -505,7 +558,8 @@ StringGroup = text # Repeatedly used attributes ProfileName = attribute profile { text } ExtensionName = attribute extension { text } -FeatureName = attribute feature { text } +# Boolean expression of core version and extension names using (),+ operators +Depends = attribute depends { text } Vendor = attribute vendor { text } Comment = attribute comment { text } Name = attribute name { text }