Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sys/linux: improve iouring interface #5646

Merged
merged 1 commit into from
Dec 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 33 additions & 7 deletions sys/linux/io_uring.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,10 @@ io_uring_register$IORING_REGISTER_NAPI(fd fd_io_uring, opcode const[IORING_REGIS
io_uring_register$IORING_UNREGISTER_NAPI(fd fd_io_uring, opcode const[IORING_UNREGISTER_NAPI], arg ptr[out, io_uring_napi], nr_args const[1])
io_uring_register$IORING_REGISTER_CLOCK(fd fd_io_uring, opcode const[IORING_REGISTER_CLOCK], arg ptr[in, io_uring_clock_register], nr_args const[0])
io_uring_register$IORING_REGISTER_CLONE_BUFFERS(fd fd_io_uring, opcode const[IORING_REGISTER_CLONE_BUFFERS], arg ptr[in, io_uring_clone_buffers], nr_args const[1])
io_uring_register$IORING_REGISTER_RESIZE_RINGS(fd fd_io_uring, opcode const[IORING_REGISTER_RESIZE_RINGS], arg ptr[in, io_uring_params], nr_args const[1])
io_uring_register$IORING_REGISTER_MEM_REGION(fd fd_io_uring, opcode const[IORING_REGISTER_MEM_REGION], arg ptr[in, io_uring_mem_region_reg], nr_args const[1])

io_uring_register_opcodes = IORING_REGISTER_BUFFERS, IORING_UNREGISTER_BUFFERS, IORING_REGISTER_FILES, IORING_UNREGISTER_FILES, IORING_REGISTER_EVENTFD, IORING_UNREGISTER_EVENTFD, IORING_REGISTER_FILES_UPDATE, IORING_REGISTER_EVENTFD_ASYNC, IORING_REGISTER_PROBE, IORING_REGISTER_PERSONALITY, IORING_UNREGISTER_PERSONALITY, IORING_REGISTER_RESTRICTIONS, IORING_REGISTER_ENABLE_RINGS, IORING_REGISTER_FILES2, IORING_REGISTER_FILES_UPDATE2, IORING_REGISTER_BUFFERS2, IORING_REGISTER_BUFFERS_UPDATE, IORING_REGISTER_IOWQ_AFF, IORING_UNREGISTER_IOWQ_AFF, IORING_REGISTER_IOWQ_MAX_WORKERS, IORING_REGISTER_RING_FDS, IORING_UNREGISTER_RING_FDS, IORING_REGISTER_PBUF_RING, IORING_UNREGISTER_PBUF_RING, IORING_REGISTER_SYNC_CANCEL, IORING_REGISTER_FILE_ALLOC_RANGE, IORING_REGISTER_PBUF_STATUS, IORING_REGISTER_NAPI, IORING_UNREGISTER_NAPI, IORING_REGISTER_CLOCK, IORING_REGISTER_CLONE_BUFFERS
io_uring_register_opcodes = IORING_REGISTER_BUFFERS, IORING_UNREGISTER_BUFFERS, IORING_REGISTER_FILES, IORING_UNREGISTER_FILES, IORING_REGISTER_EVENTFD, IORING_UNREGISTER_EVENTFD, IORING_REGISTER_FILES_UPDATE, IORING_REGISTER_EVENTFD_ASYNC, IORING_REGISTER_PROBE, IORING_REGISTER_PERSONALITY, IORING_UNREGISTER_PERSONALITY, IORING_REGISTER_RESTRICTIONS, IORING_REGISTER_ENABLE_RINGS, IORING_REGISTER_FILES2, IORING_REGISTER_FILES_UPDATE2, IORING_REGISTER_BUFFERS2, IORING_REGISTER_BUFFERS_UPDATE, IORING_REGISTER_IOWQ_AFF, IORING_UNREGISTER_IOWQ_AFF, IORING_REGISTER_IOWQ_MAX_WORKERS, IORING_REGISTER_RING_FDS, IORING_UNREGISTER_RING_FDS, IORING_REGISTER_PBUF_RING, IORING_UNREGISTER_PBUF_RING, IORING_REGISTER_SYNC_CANCEL, IORING_REGISTER_FILE_ALLOC_RANGE, IORING_REGISTER_PBUF_STATUS, IORING_REGISTER_NAPI, IORING_UNREGISTER_NAPI, IORING_REGISTER_CLOCK, IORING_REGISTER_CLONE_BUFFERS, IORING_REGISTER_SEND_MSG_RING, IORING_REGISTER_RESIZE_RINGS, IORING_REGISTER_MEM_REGION

# The mmap'ed area for SQ and CQ rings are really the same -- the difference is
# accounted for with the usage of offsets.
Expand All @@ -74,7 +76,7 @@ mmap$IORING_OFF_CQ_RING(addr vma, len len[addr], prot flags[mmap_prot], flags fl
mmap$IORING_OFF_SQES(addr vma, len len[addr], prot flags[mmap_prot], flags flags[mmap_flags], fd fd_io_uring, offset const[IORING_OFF_SQES]) sqes_ptr

# If no flags are specified(0), the io_uring instance is setup for interrupt driven IO.
io_uring_setup_flags = 0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP_SQ_AFF, IORING_SETUP_CQSIZE, IORING_SETUP_CLAMP, IORING_SETUP_ATTACH_WQ, IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE, IORING_FEAT_RW_CUR_POS, IORING_FEAT_FAST_POLL, IORING_FEAT_POLL_32BITS, IORING_SETUP_R_DISABLED, IORING_FEAT_SQPOLL_NONFIXED, IORING_FEAT_NATIVE_WORKERS, IORING_FEAT_RSRC_TAGS, IORING_FEAT_CQE_SKIP, IORING_SETUP_SUBMIT_ALL, IORING_SETUP_COOP_TASKRUN, IORING_SETUP_TASKRUN_FLAG, IORING_SETUP_SQE128, IORING_SETUP_CQE32, IORING_SETUP_SINGLE_ISSUER, IORING_SETUP_DEFER_TASKRUN
io_uring_setup_flags = 0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP_SQ_AFF, IORING_SETUP_CQSIZE, IORING_SETUP_CLAMP, IORING_SETUP_ATTACH_WQ, IORING_FEAT_SINGLE_MMAP, IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE, IORING_FEAT_RW_CUR_POS, IORING_FEAT_FAST_POLL, IORING_FEAT_POLL_32BITS, IORING_SETUP_R_DISABLED, IORING_FEAT_SQPOLL_NONFIXED, IORING_FEAT_NATIVE_WORKERS, IORING_FEAT_RSRC_TAGS, IORING_FEAT_CQE_SKIP, IORING_FEAT_LINKED_FILE, IORING_FEAT_REG_REG_RING, IORING_FEAT_RECVSEND_BUNDLE, IORING_FEAT_MIN_TIMEOUT, IORING_SETUP_SUBMIT_ALL, IORING_SETUP_COOP_TASKRUN, IORING_SETUP_TASKRUN_FLAG, IORING_SETUP_SQE128, IORING_SETUP_CQE32, IORING_SETUP_SINGLE_ISSUER, IORING_SETUP_DEFER_TASKRUN, IORING_SETUP_NO_MMAP, IORING_SETUP_REGISTERED_FD_ONLY, IORING_SETUP_NO_SQARRAY, IORING_SETUP_HYBRID_IOPOLL
# watch out the being tested kernel version
# IORING_FEAT_SINGLE_MMAP >= 5.4
# IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE >= 5.5
Expand All @@ -91,7 +93,7 @@ io_uring_setup_flags = 0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP
# IORING_SETUP_SINGLE_ISSUER >= 6.0
# IORING_SETUP_DEFER_TASKRUN >= 6.1

io_uring_enter_flags = IORING_ENTER_GETEVENTS, IORING_ENTER_SQ_WAKEUP, IORING_ENTER_SQ_WAIT, IORING_ENTER_EXT_ARG, IORING_ENTER_REGISTERED_RING
io_uring_enter_flags = IORING_ENTER_GETEVENTS, IORING_ENTER_SQ_WAKEUP, IORING_ENTER_SQ_WAIT, IORING_ENTER_EXT_ARG, IORING_ENTER_REGISTERED_RING, IORING_ENTER_ABS_TIMER, IORING_ENTER_EXT_ARG_REG
# IORING_ENTER_EXT_ARG >= 5.11
_ = __NR_mmap2

Expand Down Expand Up @@ -335,7 +337,7 @@ type io_uring_sqe$recvmsg io_uring_sqe[IORING_OP_RECVMSG, flags[iouring_recv_iop
type io_uring_sqe$timeout io_uring_sqe[IORING_OP_TIMEOUT, const[0, int16], const[0, int32], io_uring_timeout_completion_event_count, ptr[in, timespec], const[1, int32], flags[io_uring_timeout_flags, int32], sqe_user_data_not_openat, personality_only_misc]
# IORING_OP_TIMEOUT >= 5.4
type io_uring_sqe$timeout_remove io_uring_sqe[IORING_OP_TIMEOUT_REMOVE, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$accept io_uring_sqe[IORING_OP_ACCEPT, const[0, int16], sock, ptr[inout, len[addr, int32]], ptr[out, sockaddr_storage, opt], const[0, int32], flags[accept_flags, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$accept io_uring_sqe[IORING_OP_ACCEPT, flags[iouring_accept_flags, int16], sock, ptr[inout, len[addr, int32]], ptr[out, sockaddr_storage, opt], const[0, int32], flags[accept_flags, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$async_cancel io_uring_sqe[IORING_OP_ASYNC_CANCEL, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$link_timeout io_uring_sqe[IORING_OP_LINK_TIMEOUT, const[0, int16], const[0, int32], const[0, int64], ptr[in, timespec], const[1, int32], flags[io_uring_timeout_flags, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$connect io_uring_sqe[IORING_OP_CONNECT, const[0, int16], sock, len[addr, int32], ptr[in, sockaddr_storage], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
Expand All @@ -346,6 +348,8 @@ type io_uring_sqe$close io_uring_sqe[IORING_OP_CLOSE, const[0, int16], fd, const
type io_uring_sqe$files_update io_uring_sqe[IORING_OP_FILES_UPDATE, const[0, int16], const[0, int32], fileoff[int64], ptr[in, array[fd]], len[addr, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$statx io_uring_sqe[IORING_OP_STATX, const[0, int16], fd_dir[opt], ptr[out, statx], ptr64[in, filename], flags[statx_mask, int32], flags[statx_flags, int32], sqe_user_data_not_openat, personality_only_misc]

iouring_accept_flags = IORING_ACCEPT_MULTISHOT, IORING_ACCEPT_DONTWAIT, IORING_ACCEPT_POLL_FIRST

io_uring_sqe_read [
pass_buffer io_uring_sqe[IORING_OP_READ, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], buffer[out], bytesize[addr, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, personality_only_misc]
use_registered_buffer io_uring_sqe[IORING_OP_READ, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], const[0, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc]
Expand All @@ -356,7 +360,7 @@ type io_uring_sqe$fadvise io_uring_sqe[IORING_OP_FADVISE, const[0, int16], fd_or
type io_uring_sqe$madvise io_uring_sqe[IORING_OP_MADVISE, const[0, int16], const[0, int32], const[0, int64], vma, len[addr, int32], flags[madvise_flags, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$send io_uring_sqe[IORING_OP_SEND, const[0, int16], sock, const[0, int64], buffer[in], len[addr, int32], flags[send_flags, int32], sqe_user_data_not_openat, personality_only_misc]

iouring_recv_ioprio = IORING_RECVSEND_POLL_FIRST, IORING_RECV_MULTISHOT, IORING_RECVSEND_FIXED_BUF
iouring_recv_ioprio = IORING_RECVSEND_POLL_FIRST, IORING_RECV_MULTISHOT, IORING_RECVSEND_FIXED_BUF, IORING_RECVSEND_BUNDLE

io_uring_sqe_recv [
pass_buffer io_uring_sqe[IORING_OP_RECV, const[0, int16], sock, const[0, int64], buffer[inout], len[addr, int32], flags[recv_flags, int32], sqe_user_data_not_openat, personality_only_misc]
Expand Down Expand Up @@ -392,7 +396,10 @@ type io_uring_sqe$symlinkat io_uring_sqe[IORING_OP_SYMLINKAT, const[0, int16], f
type io_uring_sqe$linkat io_uring_sqe[IORING_OP_LINKAT, const[0, int16], fd_dir, ptr64[in, filename], ptr64[in, filename], fd_dir, flags[linkat_flags, int32], sqe_user_data_not_openat, personality_only_misc]
# IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, IORING_OP_LINKAT >= 5.15

type io_uring_sqe$msg_ring io_uring_sqe[IORING_OP_MSG_RING, const[0, int16], fd_io_uring, int64, buffer[in], len[addr, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
iouring_msg_ring_cmd_flags = IORING_MSG_DATA, IORING_MSG_SEND_FD
msg_ring_flags = IORING_MSG_RING_CQE_SKIP, IORING_MSG_RING_FLAGS_PASS

type io_uring_sqe$msg_ring io_uring_sqe[IORING_OP_MSG_RING, const[0, int16], fd_io_uring, flags[iouring_msg_ring_cmd_flags, int64], buffer[in], len[addr, int32], flags[msg_ring_flags, int32], sqe_user_data_not_openat, personality_only_misc]
# IORING_OP_MSG_RING >= 5.18

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
Expand Down Expand Up @@ -422,7 +429,7 @@ fd_or_fixed_fd_index [
io_uring_fsync_flags = 0, IORING_FSYNC_DATASYNC

# 0 for relative, IORING_TIMEOUT_ABS for absolute timeout value
io_uring_timeout_flags = 0, IORING_TIMEOUT_ABS
io_uring_timeout_flags = 0, IORING_TIMEOUT_ABS, IORING_TIMEOUT_UPDATE, IORING_TIMEOUT_BOOTTIME, IORING_TIMEOUT_REALTIME, IORING_LINK_TIMEOUT_UPDATE, IORING_TIMEOUT_ETIME_SUCCESS, IORING_TIMEOUT_MULTISHOT, IORING_TIMEOUT_CLOCK_MASK, IORING_TIMEOUT_UPDATE_MASK

# The timeout condition is met when either the specific timeout expries, or the
# specified number of events have completed. If not set, defaults to 1. Use a
Expand Down Expand Up @@ -543,3 +550,22 @@ io_uring_clone_buffers {
flags const[1, int32]
pad array[const[0, int32], 6]
}

io_uring_mem_region_reg {
region_uptr ptr64[inout, io_uring_region_desc]
flags flags[io_uring_region_flags, int64]
resv array[const[0, int64], 2]
}

io_uring_region_flags = IORING_MEM_REGION_REG_WAIT_ARG

io_uring_region_desc {
user_addr ptr64[inout, array[int8]]
size len[user_addr, int64]
flags flags[io_uring_region_desc_flags, int32]
id int32
mmap_offset int64
resv array[const[0, int64], 4]
}

io_uring_region_desc_flags = IORING_MEM_REGION_TYPE_USER
32 changes: 32 additions & 0 deletions sys/linux/io_uring.txt.const
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,46 @@ CQ_TAIL_OFFSET = 192
EPOLL_CTL_ADD = 1
EPOLL_CTL_DEL = 2
EPOLL_CTL_MOD = 3
IORING_ACCEPT_DONTWAIT = 2
IORING_ACCEPT_MULTISHOT = 1
IORING_ACCEPT_POLL_FIRST = 4
IORING_ASYNC_CANCEL_ALL = 1
IORING_ASYNC_CANCEL_ANY = 4
IORING_ASYNC_CANCEL_FD = 2
IORING_ASYNC_CANCEL_FD_FIXED = 8
IORING_ASYNC_CANCEL_OP = 32
IORING_ASYNC_CANCEL_USERDATA = 16
IORING_ENTER_ABS_TIMER = 32
IORING_ENTER_EXT_ARG = 8
IORING_ENTER_EXT_ARG_REG = 64
IORING_ENTER_GETEVENTS = 1
IORING_ENTER_REGISTERED_RING = 16
IORING_ENTER_SQ_WAIT = 4
IORING_ENTER_SQ_WAKEUP = 2
IORING_FEAT_CQE_SKIP = 2048
IORING_FEAT_FAST_POLL = 32
IORING_FEAT_LINKED_FILE = 4096
IORING_FEAT_MIN_TIMEOUT = 32768
IORING_FEAT_NATIVE_WORKERS = 512
IORING_FEAT_NODROP = 2
IORING_FEAT_POLL_32BITS = 64
IORING_FEAT_RECVSEND_BUNDLE = 16384
IORING_FEAT_REG_REG_RING = 8192
IORING_FEAT_RSRC_TAGS = 1024
IORING_FEAT_RW_CUR_POS = 8
IORING_FEAT_SINGLE_MMAP = 1
IORING_FEAT_SQPOLL_NONFIXED = 128
IORING_FEAT_SUBMIT_STABLE = 4
IORING_FSYNC_DATASYNC = 1
IORING_LINK_TIMEOUT_UPDATE = 16
IORING_MAX_CQ_ENTRIES = 65536
IORING_MAX_ENTRIES = 32768
IORING_MEM_REGION_REG_WAIT_ARG = 1
IORING_MEM_REGION_TYPE_USER = 1
IORING_MSG_DATA = 0
IORING_MSG_RING_CQE_SKIP = 1
IORING_MSG_RING_FLAGS_PASS = 2
IORING_MSG_SEND_FD = 1
IORING_OFF_CQ_RING = 134217728
IORING_OFF_SQES = 268435456
IORING_OFF_SQ_RING = 0
Expand Down Expand Up @@ -76,6 +93,7 @@ IORING_OP_UNLINKAT = 36
IORING_OP_WRITE = 23
IORING_OP_WRITEV = 2
IORING_OP_WRITE_FIXED = 5
IORING_RECVSEND_BUNDLE = 16
IORING_RECVSEND_FIXED_BUF = 4
IORING_RECVSEND_POLL_FIRST = 1
IORING_RECV_MULTISHOT = 2
Expand All @@ -94,13 +112,16 @@ IORING_REGISTER_FILES_UPDATE2 = 14
IORING_REGISTER_FILE_ALLOC_RANGE = 25
IORING_REGISTER_IOWQ_AFF = 17
IORING_REGISTER_IOWQ_MAX_WORKERS = 19
IORING_REGISTER_MEM_REGION = 34
IORING_REGISTER_NAPI = 27
IORING_REGISTER_PBUF_RING = 22
IORING_REGISTER_PBUF_STATUS = 26
IORING_REGISTER_PERSONALITY = 9
IORING_REGISTER_PROBE = 8
IORING_REGISTER_RESIZE_RINGS = 33
IORING_REGISTER_RESTRICTIONS = 11
IORING_REGISTER_RING_FDS = 20
IORING_REGISTER_SEND_MSG_RING = 31
IORING_REGISTER_SYNC_CANCEL = 24
IORING_RESTRICTION_REGISTER_OP = 0
IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2
Expand All @@ -113,7 +134,11 @@ IORING_SETUP_COOP_TASKRUN = 256
IORING_SETUP_CQE32 = 2048
IORING_SETUP_CQSIZE = 8
IORING_SETUP_DEFER_TASKRUN = 8192
IORING_SETUP_HYBRID_IOPOLL = 131072
IORING_SETUP_IOPOLL = 1
IORING_SETUP_NO_MMAP = 16384
IORING_SETUP_NO_SQARRAY = 65536
IORING_SETUP_REGISTERED_FD_ONLY = 32768
IORING_SETUP_R_DISABLED = 64
IORING_SETUP_SINGLE_ISSUER = 4096
IORING_SETUP_SQE128 = 1024
Expand All @@ -122,6 +147,13 @@ IORING_SETUP_SQ_AFF = 4
IORING_SETUP_SUBMIT_ALL = 128
IORING_SETUP_TASKRUN_FLAG = 512
IORING_TIMEOUT_ABS = 1
IORING_TIMEOUT_BOOTTIME = 4
IORING_TIMEOUT_CLOCK_MASK = 12
IORING_TIMEOUT_ETIME_SUCCESS = 32
IORING_TIMEOUT_MULTISHOT = 64
IORING_TIMEOUT_REALTIME = 8
IORING_TIMEOUT_UPDATE = 2
IORING_TIMEOUT_UPDATE_MASK = 18
IORING_UNREGISTER_BUFFERS = 1
IORING_UNREGISTER_EVENTFD = 5
IORING_UNREGISTER_FILES = 3
Expand Down
Loading