Skip to content

Commit

Permalink
sys/linux: improve iouring interface
Browse files Browse the repository at this point in the history
  • Loading branch information
Sablin Viacheslav committed Dec 24, 2024
1 parent b4fbdbd commit 8eece32
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 7 deletions.
40 changes: 33 additions & 7 deletions sys/linux/io_uring.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,10 @@ io_uring_register$IORING_REGISTER_NAPI(fd fd_io_uring, opcode const[IORING_REGIS
io_uring_register$IORING_UNREGISTER_NAPI(fd fd_io_uring, opcode const[IORING_UNREGISTER_NAPI], arg ptr[out, io_uring_napi], nr_args const[1])
io_uring_register$IORING_REGISTER_CLOCK(fd fd_io_uring, opcode const[IORING_REGISTER_CLOCK], arg ptr[in, io_uring_clock_register], nr_args const[0])
io_uring_register$IORING_REGISTER_CLONE_BUFFERS(fd fd_io_uring, opcode const[IORING_REGISTER_CLONE_BUFFERS], arg ptr[in, io_uring_clone_buffers], nr_args const[1])
io_uring_register$IORING_REGISTER_RESIZE_RINGS(fd fd_io_uring, opcode const[IORING_REGISTER_RESIZE_RINGS], arg ptr[in, io_uring_params], nr_args const[1])
io_uring_register$IORING_REGISTER_MEM_REGION(fd fd_io_uring, opcode const[IORING_REGISTER_MEM_REGION], arg ptr[in, io_uring_mem_region_reg], nr_args const[1])

io_uring_register_opcodes = IORING_REGISTER_BUFFERS, IORING_UNREGISTER_BUFFERS, IORING_REGISTER_FILES, IORING_UNREGISTER_FILES, IORING_REGISTER_EVENTFD, IORING_UNREGISTER_EVENTFD, IORING_REGISTER_FILES_UPDATE, IORING_REGISTER_EVENTFD_ASYNC, IORING_REGISTER_PROBE, IORING_REGISTER_PERSONALITY, IORING_UNREGISTER_PERSONALITY, IORING_REGISTER_RESTRICTIONS, IORING_REGISTER_ENABLE_RINGS, IORING_REGISTER_FILES2, IORING_REGISTER_FILES_UPDATE2, IORING_REGISTER_BUFFERS2, IORING_REGISTER_BUFFERS_UPDATE, IORING_REGISTER_IOWQ_AFF, IORING_UNREGISTER_IOWQ_AFF, IORING_REGISTER_IOWQ_MAX_WORKERS, IORING_REGISTER_RING_FDS, IORING_UNREGISTER_RING_FDS, IORING_REGISTER_PBUF_RING, IORING_UNREGISTER_PBUF_RING, IORING_REGISTER_SYNC_CANCEL, IORING_REGISTER_FILE_ALLOC_RANGE, IORING_REGISTER_PBUF_STATUS, IORING_REGISTER_NAPI, IORING_UNREGISTER_NAPI, IORING_REGISTER_CLOCK, IORING_REGISTER_CLONE_BUFFERS
io_uring_register_opcodes = IORING_REGISTER_BUFFERS, IORING_UNREGISTER_BUFFERS, IORING_REGISTER_FILES, IORING_UNREGISTER_FILES, IORING_REGISTER_EVENTFD, IORING_UNREGISTER_EVENTFD, IORING_REGISTER_FILES_UPDATE, IORING_REGISTER_EVENTFD_ASYNC, IORING_REGISTER_PROBE, IORING_REGISTER_PERSONALITY, IORING_UNREGISTER_PERSONALITY, IORING_REGISTER_RESTRICTIONS, IORING_REGISTER_ENABLE_RINGS, IORING_REGISTER_FILES2, IORING_REGISTER_FILES_UPDATE2, IORING_REGISTER_BUFFERS2, IORING_REGISTER_BUFFERS_UPDATE, IORING_REGISTER_IOWQ_AFF, IORING_UNREGISTER_IOWQ_AFF, IORING_REGISTER_IOWQ_MAX_WORKERS, IORING_REGISTER_RING_FDS, IORING_UNREGISTER_RING_FDS, IORING_REGISTER_PBUF_RING, IORING_UNREGISTER_PBUF_RING, IORING_REGISTER_SYNC_CANCEL, IORING_REGISTER_FILE_ALLOC_RANGE, IORING_REGISTER_PBUF_STATUS, IORING_REGISTER_NAPI, IORING_UNREGISTER_NAPI, IORING_REGISTER_CLOCK, IORING_REGISTER_CLONE_BUFFERS, IORING_REGISTER_SEND_MSG_RING, IORING_REGISTER_RESIZE_RINGS, IORING_REGISTER_MEM_REGION

# The mmap'ed area for SQ and CQ rings are really the same -- the difference is
# accounted for with the usage of offsets.
Expand All @@ -74,7 +76,7 @@ mmap$IORING_OFF_CQ_RING(addr vma, len len[addr], prot flags[mmap_prot], flags fl
mmap$IORING_OFF_SQES(addr vma, len len[addr], prot flags[mmap_prot], flags flags[mmap_flags], fd fd_io_uring, offset const[IORING_OFF_SQES]) sqes_ptr

# If no flags are specified(0), the io_uring instance is setup for interrupt driven IO.
io_uring_setup_flags = 0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP_SQ_AFF, IORING_SETUP_CQSIZE, IORING_SETUP_CLAMP, IORING_SETUP_ATTACH_WQ, IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE, IORING_FEAT_RW_CUR_POS, IORING_FEAT_FAST_POLL, IORING_FEAT_POLL_32BITS, IORING_SETUP_R_DISABLED, IORING_FEAT_SQPOLL_NONFIXED, IORING_FEAT_NATIVE_WORKERS, IORING_FEAT_RSRC_TAGS, IORING_FEAT_CQE_SKIP, IORING_SETUP_SUBMIT_ALL, IORING_SETUP_COOP_TASKRUN, IORING_SETUP_TASKRUN_FLAG, IORING_SETUP_SQE128, IORING_SETUP_CQE32, IORING_SETUP_SINGLE_ISSUER, IORING_SETUP_DEFER_TASKRUN
io_uring_setup_flags = 0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP_SQ_AFF, IORING_SETUP_CQSIZE, IORING_SETUP_CLAMP, IORING_SETUP_ATTACH_WQ, IORING_FEAT_SINGLE_MMAP, IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE, IORING_FEAT_RW_CUR_POS, IORING_FEAT_FAST_POLL, IORING_FEAT_POLL_32BITS, IORING_SETUP_R_DISABLED, IORING_FEAT_SQPOLL_NONFIXED, IORING_FEAT_NATIVE_WORKERS, IORING_FEAT_RSRC_TAGS, IORING_FEAT_CQE_SKIP, IORING_FEAT_LINKED_FILE, IORING_FEAT_REG_REG_RING, IORING_FEAT_RECVSEND_BUNDLE, IORING_FEAT_MIN_TIMEOUT, IORING_SETUP_SUBMIT_ALL, IORING_SETUP_COOP_TASKRUN, IORING_SETUP_TASKRUN_FLAG, IORING_SETUP_SQE128, IORING_SETUP_CQE32, IORING_SETUP_SINGLE_ISSUER, IORING_SETUP_DEFER_TASKRUN, IORING_SETUP_NO_MMAP, IORING_SETUP_REGISTERED_FD_ONLY, IORING_SETUP_NO_SQARRAY, IORING_SETUP_HYBRID_IOPOLL
# watch out the being tested kernel version
# IORING_FEAT_SINGLE_MMAP >= 5.4
# IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE >= 5.5
Expand All @@ -91,7 +93,7 @@ io_uring_setup_flags = 0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP
# IORING_SETUP_SINGLE_ISSUER >= 6.0
# IORING_SETUP_DEFER_TASKRUN >= 6.1

io_uring_enter_flags = IORING_ENTER_GETEVENTS, IORING_ENTER_SQ_WAKEUP, IORING_ENTER_SQ_WAIT, IORING_ENTER_EXT_ARG, IORING_ENTER_REGISTERED_RING
io_uring_enter_flags = IORING_ENTER_GETEVENTS, IORING_ENTER_SQ_WAKEUP, IORING_ENTER_SQ_WAIT, IORING_ENTER_EXT_ARG, IORING_ENTER_REGISTERED_RING, IORING_ENTER_ABS_TIMER, IORING_ENTER_EXT_ARG_REG
# IORING_ENTER_EXT_ARG >= 5.11
_ = __NR_mmap2

Expand Down Expand Up @@ -335,7 +337,7 @@ type io_uring_sqe$recvmsg io_uring_sqe[IORING_OP_RECVMSG, flags[iouring_recv_iop
type io_uring_sqe$timeout io_uring_sqe[IORING_OP_TIMEOUT, const[0, int16], const[0, int32], io_uring_timeout_completion_event_count, ptr[in, timespec], const[1, int32], flags[io_uring_timeout_flags, int32], sqe_user_data_not_openat, personality_only_misc]
# IORING_OP_TIMEOUT >= 5.4
type io_uring_sqe$timeout_remove io_uring_sqe[IORING_OP_TIMEOUT_REMOVE, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$accept io_uring_sqe[IORING_OP_ACCEPT, const[0, int16], sock, ptr[inout, len[addr, int32]], ptr[out, sockaddr_storage, opt], const[0, int32], flags[accept_flags, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$accept io_uring_sqe[IORING_OP_ACCEPT, flags[iouring_accept_flags, int16], sock, ptr[inout, len[addr, int32]], ptr[out, sockaddr_storage, opt], const[0, int32], flags[accept_flags, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$async_cancel io_uring_sqe[IORING_OP_ASYNC_CANCEL, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$link_timeout io_uring_sqe[IORING_OP_LINK_TIMEOUT, const[0, int16], const[0, int32], const[0, int64], ptr[in, timespec], const[1, int32], flags[io_uring_timeout_flags, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$connect io_uring_sqe[IORING_OP_CONNECT, const[0, int16], sock, len[addr, int32], ptr[in, sockaddr_storage], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
Expand All @@ -346,6 +348,8 @@ type io_uring_sqe$close io_uring_sqe[IORING_OP_CLOSE, const[0, int16], fd, const
type io_uring_sqe$files_update io_uring_sqe[IORING_OP_FILES_UPDATE, const[0, int16], const[0, int32], fileoff[int64], ptr[in, array[fd]], len[addr, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$statx io_uring_sqe[IORING_OP_STATX, const[0, int16], fd_dir[opt], ptr[out, statx], ptr64[in, filename], flags[statx_mask, int32], flags[statx_flags, int32], sqe_user_data_not_openat, personality_only_misc]

iouring_accept_flags = IORING_ACCEPT_MULTISHOT, IORING_ACCEPT_DONTWAIT, IORING_ACCEPT_POLL_FIRST

io_uring_sqe_read [
pass_buffer io_uring_sqe[IORING_OP_READ, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], buffer[out], bytesize[addr, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, personality_only_misc]
use_registered_buffer io_uring_sqe[IORING_OP_READ, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], const[0, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc]
Expand All @@ -356,7 +360,7 @@ type io_uring_sqe$fadvise io_uring_sqe[IORING_OP_FADVISE, const[0, int16], fd_or
type io_uring_sqe$madvise io_uring_sqe[IORING_OP_MADVISE, const[0, int16], const[0, int32], const[0, int64], vma, len[addr, int32], flags[madvise_flags, int32], sqe_user_data_not_openat, personality_only_misc]
type io_uring_sqe$send io_uring_sqe[IORING_OP_SEND, const[0, int16], sock, const[0, int64], buffer[in], len[addr, int32], flags[send_flags, int32], sqe_user_data_not_openat, personality_only_misc]

iouring_recv_ioprio = IORING_RECVSEND_POLL_FIRST, IORING_RECV_MULTISHOT, IORING_RECVSEND_FIXED_BUF
iouring_recv_ioprio = IORING_RECVSEND_POLL_FIRST, IORING_RECV_MULTISHOT, IORING_RECVSEND_FIXED_BUF, IORING_RECVSEND_BUNDLE

io_uring_sqe_recv [
pass_buffer io_uring_sqe[IORING_OP_RECV, const[0, int16], sock, const[0, int64], buffer[inout], len[addr, int32], flags[recv_flags, int32], sqe_user_data_not_openat, personality_only_misc]
Expand Down Expand Up @@ -392,7 +396,10 @@ type io_uring_sqe$symlinkat io_uring_sqe[IORING_OP_SYMLINKAT, const[0, int16], f
type io_uring_sqe$linkat io_uring_sqe[IORING_OP_LINKAT, const[0, int16], fd_dir, ptr64[in, filename], ptr64[in, filename], fd_dir, flags[linkat_flags, int32], sqe_user_data_not_openat, personality_only_misc]
# IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, IORING_OP_LINKAT >= 5.15

type io_uring_sqe$msg_ring io_uring_sqe[IORING_OP_MSG_RING, const[0, int16], fd_io_uring, int64, buffer[in], len[addr, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
iouring_msg_ring_cmd_flags = IORING_MSG_DATA, IORING_MSG_SEND_FD
msg_ring_flags = IORING_MSG_RING_CQE_SKIP, IORING_MSG_RING_FLAGS_PASS

type io_uring_sqe$msg_ring io_uring_sqe[IORING_OP_MSG_RING, const[0, int16], fd_io_uring, flags[iouring_msg_ring_cmd_flags, int64], buffer[in], len[addr, int32], flags[msg_ring_flags, int32], sqe_user_data_not_openat, personality_only_misc]
# IORING_OP_MSG_RING >= 5.18

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
Expand Down Expand Up @@ -422,7 +429,7 @@ fd_or_fixed_fd_index [
io_uring_fsync_flags = 0, IORING_FSYNC_DATASYNC

# 0 for relative, IORING_TIMEOUT_ABS for absolute timeout value
io_uring_timeout_flags = 0, IORING_TIMEOUT_ABS
io_uring_timeout_flags = 0, IORING_TIMEOUT_ABS, IORING_TIMEOUT_UPDATE, IORING_TIMEOUT_BOOTTIME, IORING_TIMEOUT_REALTIME, IORING_LINK_TIMEOUT_UPDATE, IORING_TIMEOUT_ETIME_SUCCESS, IORING_TIMEOUT_MULTISHOT, IORING_TIMEOUT_CLOCK_MASK, IORING_TIMEOUT_UPDATE_MASK

# The timeout condition is met when either the specific timeout expries, or the
# specified number of events have completed. If not set, defaults to 1. Use a
Expand Down Expand Up @@ -543,3 +550,22 @@ io_uring_clone_buffers {
flags const[1, int32]
pad array[const[0, int32], 6]
}

io_uring_mem_region_reg {
region_uptr ptr64[inout, io_uring_region_desc]
flags flags[io_uring_region_flags, int64]
resv array[const[0, int64], 2]
}

io_uring_region_flags = IORING_MEM_REGION_REG_WAIT_ARG

io_uring_region_desc {
user_addr ptr64[inout, array[int8]]
size len[user_addr, int64]
flags flags[io_uring_region_desc_flags, int32]
id int32
mmap_offset int64
resv array[const[0, int64], 4]
}

io_uring_region_desc_flags = IORING_MEM_REGION_TYPE_USER
32 changes: 32 additions & 0 deletions sys/linux/io_uring.txt.const
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,46 @@ CQ_TAIL_OFFSET = 192
EPOLL_CTL_ADD = 1
EPOLL_CTL_DEL = 2
EPOLL_CTL_MOD = 3
IORING_ACCEPT_DONTWAIT = 2
IORING_ACCEPT_MULTISHOT = 1
IORING_ACCEPT_POLL_FIRST = 4
IORING_ASYNC_CANCEL_ALL = 1
IORING_ASYNC_CANCEL_ANY = 4
IORING_ASYNC_CANCEL_FD = 2
IORING_ASYNC_CANCEL_FD_FIXED = 8
IORING_ASYNC_CANCEL_OP = 32
IORING_ASYNC_CANCEL_USERDATA = 16
IORING_ENTER_ABS_TIMER = 32
IORING_ENTER_EXT_ARG = 8
IORING_ENTER_EXT_ARG_REG = 64
IORING_ENTER_GETEVENTS = 1
IORING_ENTER_REGISTERED_RING = 16
IORING_ENTER_SQ_WAIT = 4
IORING_ENTER_SQ_WAKEUP = 2
IORING_FEAT_CQE_SKIP = 2048
IORING_FEAT_FAST_POLL = 32
IORING_FEAT_LINKED_FILE = 4096
IORING_FEAT_MIN_TIMEOUT = 32768
IORING_FEAT_NATIVE_WORKERS = 512
IORING_FEAT_NODROP = 2
IORING_FEAT_POLL_32BITS = 64
IORING_FEAT_RECVSEND_BUNDLE = 16384
IORING_FEAT_REG_REG_RING = 8192
IORING_FEAT_RSRC_TAGS = 1024
IORING_FEAT_RW_CUR_POS = 8
IORING_FEAT_SINGLE_MMAP = 1
IORING_FEAT_SQPOLL_NONFIXED = 128
IORING_FEAT_SUBMIT_STABLE = 4
IORING_FSYNC_DATASYNC = 1
IORING_LINK_TIMEOUT_UPDATE = 16
IORING_MAX_CQ_ENTRIES = 65536
IORING_MAX_ENTRIES = 32768
IORING_MEM_REGION_REG_WAIT_ARG = 1
IORING_MEM_REGION_TYPE_USER = 1
IORING_MSG_DATA = 0
IORING_MSG_RING_CQE_SKIP = 1
IORING_MSG_RING_FLAGS_PASS = 2
IORING_MSG_SEND_FD = 1
IORING_OFF_CQ_RING = 134217728
IORING_OFF_SQES = 268435456
IORING_OFF_SQ_RING = 0
Expand Down Expand Up @@ -76,6 +93,7 @@ IORING_OP_UNLINKAT = 36
IORING_OP_WRITE = 23
IORING_OP_WRITEV = 2
IORING_OP_WRITE_FIXED = 5
IORING_RECVSEND_BUNDLE = 16
IORING_RECVSEND_FIXED_BUF = 4
IORING_RECVSEND_POLL_FIRST = 1
IORING_RECV_MULTISHOT = 2
Expand All @@ -94,13 +112,16 @@ IORING_REGISTER_FILES_UPDATE2 = 14
IORING_REGISTER_FILE_ALLOC_RANGE = 25
IORING_REGISTER_IOWQ_AFF = 17
IORING_REGISTER_IOWQ_MAX_WORKERS = 19
IORING_REGISTER_MEM_REGION = 34
IORING_REGISTER_NAPI = 27
IORING_REGISTER_PBUF_RING = 22
IORING_REGISTER_PBUF_STATUS = 26
IORING_REGISTER_PERSONALITY = 9
IORING_REGISTER_PROBE = 8
IORING_REGISTER_RESIZE_RINGS = 33
IORING_REGISTER_RESTRICTIONS = 11
IORING_REGISTER_RING_FDS = 20
IORING_REGISTER_SEND_MSG_RING = 31
IORING_REGISTER_SYNC_CANCEL = 24
IORING_RESTRICTION_REGISTER_OP = 0
IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2
Expand All @@ -113,7 +134,11 @@ IORING_SETUP_COOP_TASKRUN = 256
IORING_SETUP_CQE32 = 2048
IORING_SETUP_CQSIZE = 8
IORING_SETUP_DEFER_TASKRUN = 8192
IORING_SETUP_HYBRID_IOPOLL = 131072
IORING_SETUP_IOPOLL = 1
IORING_SETUP_NO_MMAP = 16384
IORING_SETUP_NO_SQARRAY = 65536
IORING_SETUP_REGISTERED_FD_ONLY = 32768
IORING_SETUP_R_DISABLED = 64
IORING_SETUP_SINGLE_ISSUER = 4096
IORING_SETUP_SQE128 = 1024
Expand All @@ -122,6 +147,13 @@ IORING_SETUP_SQ_AFF = 4
IORING_SETUP_SUBMIT_ALL = 128
IORING_SETUP_TASKRUN_FLAG = 512
IORING_TIMEOUT_ABS = 1
IORING_TIMEOUT_BOOTTIME = 4
IORING_TIMEOUT_CLOCK_MASK = 12
IORING_TIMEOUT_ETIME_SUCCESS = 32
IORING_TIMEOUT_MULTISHOT = 64
IORING_TIMEOUT_REALTIME = 8
IORING_TIMEOUT_UPDATE = 2
IORING_TIMEOUT_UPDATE_MASK = 18
IORING_UNREGISTER_BUFFERS = 1
IORING_UNREGISTER_EVENTFD = 5
IORING_UNREGISTER_FILES = 3
Expand Down

0 comments on commit 8eece32

Please sign in to comment.