From 6306ccc803e895899df0ada12404ca054a5a461b Mon Sep 17 00:00:00 2001 From: Sablin Viacheslav Date: Tue, 24 Dec 2024 00:24:14 +0300 Subject: [PATCH] sys/linux: improve iouring interface --- sys/linux/io_uring.txt | 40 +++++++++++++++++++++++++++++------- sys/linux/io_uring.txt.const | 32 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/sys/linux/io_uring.txt b/sys/linux/io_uring.txt index 0d63c0f502f6..414138ef9562 100644 --- a/sys/linux/io_uring.txt +++ b/sys/linux/io_uring.txt @@ -64,8 +64,10 @@ io_uring_register$IORING_REGISTER_NAPI(fd fd_io_uring, opcode const[IORING_REGIS io_uring_register$IORING_UNREGISTER_NAPI(fd fd_io_uring, opcode const[IORING_UNREGISTER_NAPI], arg ptr[out, io_uring_napi], nr_args const[1]) io_uring_register$IORING_REGISTER_CLOCK(fd fd_io_uring, opcode const[IORING_REGISTER_CLOCK], arg ptr[in, io_uring_clock_register], nr_args const[0]) io_uring_register$IORING_REGISTER_CLONE_BUFFERS(fd fd_io_uring, opcode const[IORING_REGISTER_CLONE_BUFFERS], arg ptr[in, io_uring_clone_buffers], nr_args const[1]) +io_uring_register$IORING_REGISTER_RESIZE_RINGS(fd fd_io_uring, opcode const[IORING_REGISTER_RESIZE_RINGS], arg ptr[in, io_uring_params], nr_args const[1]) +io_uring_register$IORING_REGISTER_MEM_REGION(fd fd_io_uring, opcode const[IORING_REGISTER_MEM_REGION], arg ptr[in, io_uring_mem_region_reg], nr_args const[1]) -io_uring_register_opcodes = IORING_REGISTER_BUFFERS, IORING_UNREGISTER_BUFFERS, IORING_REGISTER_FILES, IORING_UNREGISTER_FILES, IORING_REGISTER_EVENTFD, IORING_UNREGISTER_EVENTFD, IORING_REGISTER_FILES_UPDATE, IORING_REGISTER_EVENTFD_ASYNC, IORING_REGISTER_PROBE, IORING_REGISTER_PERSONALITY, IORING_UNREGISTER_PERSONALITY, IORING_REGISTER_RESTRICTIONS, IORING_REGISTER_ENABLE_RINGS, IORING_REGISTER_FILES2, IORING_REGISTER_FILES_UPDATE2, IORING_REGISTER_BUFFERS2, IORING_REGISTER_BUFFERS_UPDATE, IORING_REGISTER_IOWQ_AFF, IORING_UNREGISTER_IOWQ_AFF, IORING_REGISTER_IOWQ_MAX_WORKERS, IORING_REGISTER_RING_FDS, IORING_UNREGISTER_RING_FDS, IORING_REGISTER_PBUF_RING, IORING_UNREGISTER_PBUF_RING, IORING_REGISTER_SYNC_CANCEL, IORING_REGISTER_FILE_ALLOC_RANGE, IORING_REGISTER_PBUF_STATUS, IORING_REGISTER_NAPI, IORING_UNREGISTER_NAPI, IORING_REGISTER_CLOCK, IORING_REGISTER_CLONE_BUFFERS +io_uring_register_opcodes = IORING_REGISTER_BUFFERS, IORING_UNREGISTER_BUFFERS, IORING_REGISTER_FILES, IORING_UNREGISTER_FILES, IORING_REGISTER_EVENTFD, IORING_UNREGISTER_EVENTFD, IORING_REGISTER_FILES_UPDATE, IORING_REGISTER_EVENTFD_ASYNC, IORING_REGISTER_PROBE, IORING_REGISTER_PERSONALITY, IORING_UNREGISTER_PERSONALITY, IORING_REGISTER_RESTRICTIONS, IORING_REGISTER_ENABLE_RINGS, IORING_REGISTER_FILES2, IORING_REGISTER_FILES_UPDATE2, IORING_REGISTER_BUFFERS2, IORING_REGISTER_BUFFERS_UPDATE, IORING_REGISTER_IOWQ_AFF, IORING_UNREGISTER_IOWQ_AFF, IORING_REGISTER_IOWQ_MAX_WORKERS, IORING_REGISTER_RING_FDS, IORING_UNREGISTER_RING_FDS, IORING_REGISTER_PBUF_RING, IORING_UNREGISTER_PBUF_RING, IORING_REGISTER_SYNC_CANCEL, IORING_REGISTER_FILE_ALLOC_RANGE, IORING_REGISTER_PBUF_STATUS, IORING_REGISTER_NAPI, IORING_UNREGISTER_NAPI, IORING_REGISTER_CLOCK, IORING_REGISTER_CLONE_BUFFERS, IORING_REGISTER_SEND_MSG_RING, IORING_REGISTER_RESIZE_RINGS, IORING_REGISTER_MEM_REGION # The mmap'ed area for SQ and CQ rings are really the same -- the difference is # accounted for with the usage of offsets. @@ -74,7 +76,7 @@ mmap$IORING_OFF_CQ_RING(addr vma, len len[addr], prot flags[mmap_prot], flags fl mmap$IORING_OFF_SQES(addr vma, len len[addr], prot flags[mmap_prot], flags flags[mmap_flags], fd fd_io_uring, offset const[IORING_OFF_SQES]) sqes_ptr # If no flags are specified(0), the io_uring instance is setup for interrupt driven IO. -io_uring_setup_flags = 0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP_SQ_AFF, IORING_SETUP_CQSIZE, IORING_SETUP_CLAMP, IORING_SETUP_ATTACH_WQ, IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE, IORING_FEAT_RW_CUR_POS, IORING_FEAT_FAST_POLL, IORING_FEAT_POLL_32BITS, IORING_SETUP_R_DISABLED, IORING_FEAT_SQPOLL_NONFIXED, IORING_FEAT_NATIVE_WORKERS, IORING_FEAT_RSRC_TAGS, IORING_FEAT_CQE_SKIP, IORING_SETUP_SUBMIT_ALL, IORING_SETUP_COOP_TASKRUN, IORING_SETUP_TASKRUN_FLAG, IORING_SETUP_SQE128, IORING_SETUP_CQE32, IORING_SETUP_SINGLE_ISSUER, IORING_SETUP_DEFER_TASKRUN +io_uring_setup_flags = 0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP_SQ_AFF, IORING_SETUP_CQSIZE, IORING_SETUP_CLAMP, IORING_SETUP_ATTACH_WQ, IORING_FEAT_SINGLE_MMAP, IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE, IORING_FEAT_RW_CUR_POS, IORING_FEAT_FAST_POLL, IORING_FEAT_POLL_32BITS, IORING_SETUP_R_DISABLED, IORING_FEAT_SQPOLL_NONFIXED, IORING_FEAT_NATIVE_WORKERS, IORING_FEAT_RSRC_TAGS, IORING_FEAT_CQE_SKIP, IORING_FEAT_LINKED_FILE, IORING_FEAT_REG_REG_RING, IORING_FEAT_RECVSEND_BUNDLE, IORING_FEAT_MIN_TIMEOUT, IORING_SETUP_SUBMIT_ALL, IORING_SETUP_COOP_TASKRUN, IORING_SETUP_TASKRUN_FLAG, IORING_SETUP_SQE128, IORING_SETUP_CQE32, IORING_SETUP_SINGLE_ISSUER, IORING_SETUP_DEFER_TASKRUN, IORING_SETUP_NO_MMAP, IORING_SETUP_REGISTERED_FD_ONLY, IORING_SETUP_NO_SQARRAY, IORING_SETUP_HYBRID_IOPOLL # watch out the being tested kernel version # IORING_FEAT_SINGLE_MMAP >= 5.4 # IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE >= 5.5 @@ -91,7 +93,7 @@ io_uring_setup_flags = 0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP # IORING_SETUP_SINGLE_ISSUER >= 6.0 # IORING_SETUP_DEFER_TASKRUN >= 6.1 -io_uring_enter_flags = IORING_ENTER_GETEVENTS, IORING_ENTER_SQ_WAKEUP, IORING_ENTER_SQ_WAIT, IORING_ENTER_EXT_ARG, IORING_ENTER_REGISTERED_RING +io_uring_enter_flags = IORING_ENTER_GETEVENTS, IORING_ENTER_SQ_WAKEUP, IORING_ENTER_SQ_WAIT, IORING_ENTER_EXT_ARG, IORING_ENTER_REGISTERED_RING, IORING_ENTER_ABS_TIMER, IORING_ENTER_EXT_ARG_REG # IORING_ENTER_EXT_ARG >= 5.11 _ = __NR_mmap2 @@ -335,7 +337,7 @@ type io_uring_sqe$recvmsg io_uring_sqe[IORING_OP_RECVMSG, flags[iouring_recv_iop type io_uring_sqe$timeout io_uring_sqe[IORING_OP_TIMEOUT, const[0, int16], const[0, int32], io_uring_timeout_completion_event_count, ptr[in, timespec], const[1, int32], flags[io_uring_timeout_flags, int32], sqe_user_data_not_openat, personality_only_misc] # IORING_OP_TIMEOUT >= 5.4 type io_uring_sqe$timeout_remove io_uring_sqe[IORING_OP_TIMEOUT_REMOVE, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] -type io_uring_sqe$accept io_uring_sqe[IORING_OP_ACCEPT, const[0, int16], sock, ptr[inout, len[addr, int32]], ptr[out, sockaddr_storage, opt], const[0, int32], flags[accept_flags, int32], sqe_user_data_not_openat, personality_only_misc] +type io_uring_sqe$accept io_uring_sqe[IORING_OP_ACCEPT, flags[iouring_accept_flags, int16], sock, ptr[inout, len[addr, int32]], ptr[out, sockaddr_storage, opt], const[0, int32], flags[accept_flags, int32], sqe_user_data_not_openat, personality_only_misc] type io_uring_sqe$async_cancel io_uring_sqe[IORING_OP_ASYNC_CANCEL, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] type io_uring_sqe$link_timeout io_uring_sqe[IORING_OP_LINK_TIMEOUT, const[0, int16], const[0, int32], const[0, int64], ptr[in, timespec], const[1, int32], flags[io_uring_timeout_flags, int32], sqe_user_data_not_openat, personality_only_misc] type io_uring_sqe$connect io_uring_sqe[IORING_OP_CONNECT, const[0, int16], sock, len[addr, int32], ptr[in, sockaddr_storage], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] @@ -346,6 +348,8 @@ type io_uring_sqe$close io_uring_sqe[IORING_OP_CLOSE, const[0, int16], fd, const type io_uring_sqe$files_update io_uring_sqe[IORING_OP_FILES_UPDATE, const[0, int16], const[0, int32], fileoff[int64], ptr[in, array[fd]], len[addr, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] type io_uring_sqe$statx io_uring_sqe[IORING_OP_STATX, const[0, int16], fd_dir[opt], ptr[out, statx], ptr64[in, filename], flags[statx_mask, int32], flags[statx_flags, int32], sqe_user_data_not_openat, personality_only_misc] +iouring_accept_flags = IORING_ACCEPT_MULTISHOT, IORING_ACCEPT_DONTWAIT, IORING_ACCEPT_POLL_FIRST + io_uring_sqe_read [ pass_buffer io_uring_sqe[IORING_OP_READ, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], buffer[out], bytesize[addr, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, personality_only_misc] use_registered_buffer io_uring_sqe[IORING_OP_READ, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], const[0, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc] @@ -356,7 +360,7 @@ type io_uring_sqe$fadvise io_uring_sqe[IORING_OP_FADVISE, const[0, int16], fd_or type io_uring_sqe$madvise io_uring_sqe[IORING_OP_MADVISE, const[0, int16], const[0, int32], const[0, int64], vma, len[addr, int32], flags[madvise_flags, int32], sqe_user_data_not_openat, personality_only_misc] type io_uring_sqe$send io_uring_sqe[IORING_OP_SEND, const[0, int16], sock, const[0, int64], buffer[in], len[addr, int32], flags[send_flags, int32], sqe_user_data_not_openat, personality_only_misc] -iouring_recv_ioprio = IORING_RECVSEND_POLL_FIRST, IORING_RECV_MULTISHOT, IORING_RECVSEND_FIXED_BUF +iouring_recv_ioprio = IORING_RECVSEND_POLL_FIRST, IORING_RECV_MULTISHOT, IORING_RECVSEND_FIXED_BUF, IORING_RECVSEND_BUNDLE io_uring_sqe_recv [ pass_buffer io_uring_sqe[IORING_OP_RECV, const[0, int16], sock, const[0, int64], buffer[inout], len[addr, int32], flags[recv_flags, int32], sqe_user_data_not_openat, personality_only_misc] @@ -392,7 +396,10 @@ type io_uring_sqe$symlinkat io_uring_sqe[IORING_OP_SYMLINKAT, const[0, int16], f type io_uring_sqe$linkat io_uring_sqe[IORING_OP_LINKAT, const[0, int16], fd_dir, ptr64[in, filename], ptr64[in, filename], fd_dir, flags[linkat_flags, int32], sqe_user_data_not_openat, personality_only_misc] # IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, IORING_OP_LINKAT >= 5.15 -type io_uring_sqe$msg_ring io_uring_sqe[IORING_OP_MSG_RING, const[0, int16], fd_io_uring, int64, buffer[in], len[addr, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] +iouring_msg_ring_cmd_flags = IORING_MSG_DATA, IORING_MSG_SEND_FD +msg_ring_flags = IORING_MSG_RING_CQE_SKIP, IORING_MSG_RING_FLAGS_PASS + +type io_uring_sqe$msg_ring io_uring_sqe[IORING_OP_MSG_RING, const[0, int16], fd_io_uring, flags[iouring_msg_ring_cmd_flags, int64], buffer[in], len[addr, int32], flags[msg_ring_flags, int32], sqe_user_data_not_openat, personality_only_misc] # IORING_OP_MSG_RING >= 5.18 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -422,7 +429,7 @@ fd_or_fixed_fd_index [ io_uring_fsync_flags = 0, IORING_FSYNC_DATASYNC # 0 for relative, IORING_TIMEOUT_ABS for absolute timeout value -io_uring_timeout_flags = 0, IORING_TIMEOUT_ABS +io_uring_timeout_flags = 0, IORING_TIMEOUT_ABS, IORING_TIMEOUT_UPDATE, IORING_TIMEOUT_BOOTTIME, IORING_TIMEOUT_REALTIME, IORING_LINK_TIMEOUT_UPDATE, IORING_TIMEOUT_ETIME_SUCCESS, IORING_TIMEOUT_MULTISHOT, IORING_TIMEOUT_CLOCK_MASK, IORING_TIMEOUT_UPDATE_MASK # The timeout condition is met when either the specific timeout expries, or the # specified number of events have completed. If not set, defaults to 1. Use a @@ -543,3 +550,22 @@ io_uring_clone_buffers { flags const[1, int32] pad array[const[0, int32], 6] } + +io_uring_mem_region_reg { + region_uptr ptr64[inout, io_uring_region_desc] + flags flags[io_uring_region_flags, int64] + resv array[const[0, int64], 2] +} + +io_uring_region_flags = IORING_MEM_REGION_REG_WAIT_ARG + +io_uring_region_desc { + user_addr ptr64[inout, array[int8]] + size len[user_addr, int64] + flags flags[io_uring_region_desc_flags, int32] + id int32 + mmap_offset int64 + resv array[const[0, int64], 4] +} + +io_uring_region_desc_flags = IORING_MEM_REGION_TYPE_USER diff --git a/sys/linux/io_uring.txt.const b/sys/linux/io_uring.txt.const index 6cbf37efdd6a..3f4039559198 100644 --- a/sys/linux/io_uring.txt.const +++ b/sys/linux/io_uring.txt.const @@ -9,29 +9,46 @@ CQ_TAIL_OFFSET = 192 EPOLL_CTL_ADD = 1 EPOLL_CTL_DEL = 2 EPOLL_CTL_MOD = 3 +IORING_ACCEPT_DONTWAIT = 2 +IORING_ACCEPT_MULTISHOT = 1 +IORING_ACCEPT_POLL_FIRST = 4 IORING_ASYNC_CANCEL_ALL = 1 IORING_ASYNC_CANCEL_ANY = 4 IORING_ASYNC_CANCEL_FD = 2 IORING_ASYNC_CANCEL_FD_FIXED = 8 IORING_ASYNC_CANCEL_OP = 32 IORING_ASYNC_CANCEL_USERDATA = 16 +IORING_ENTER_ABS_TIMER = 32 IORING_ENTER_EXT_ARG = 8 +IORING_ENTER_EXT_ARG_REG = 64 IORING_ENTER_GETEVENTS = 1 IORING_ENTER_REGISTERED_RING = 16 IORING_ENTER_SQ_WAIT = 4 IORING_ENTER_SQ_WAKEUP = 2 IORING_FEAT_CQE_SKIP = 2048 IORING_FEAT_FAST_POLL = 32 +IORING_FEAT_LINKED_FILE = 4096 +IORING_FEAT_MIN_TIMEOUT = 32768 IORING_FEAT_NATIVE_WORKERS = 512 IORING_FEAT_NODROP = 2 IORING_FEAT_POLL_32BITS = 64 +IORING_FEAT_RECVSEND_BUNDLE = 16384 +IORING_FEAT_REG_REG_RING = 8192 IORING_FEAT_RSRC_TAGS = 1024 IORING_FEAT_RW_CUR_POS = 8 +IORING_FEAT_SINGLE_MMAP = 1 IORING_FEAT_SQPOLL_NONFIXED = 128 IORING_FEAT_SUBMIT_STABLE = 4 IORING_FSYNC_DATASYNC = 1 +IORING_LINK_TIMEOUT_UPDATE = 16 IORING_MAX_CQ_ENTRIES = 65536 IORING_MAX_ENTRIES = 32768 +IORING_MEM_REGION_REG_WAIT_ARG = 1 +IORING_MEM_REGION_TYPE_USER = 1 +IORING_MSG_DATA = 0 +IORING_MSG_RING_CQE_SKIP = 1 +IORING_MSG_RING_FLAGS_PASS = 2 +IORING_MSG_SEND_FD = 1 IORING_OFF_CQ_RING = 134217728 IORING_OFF_SQES = 268435456 IORING_OFF_SQ_RING = 0 @@ -76,6 +93,7 @@ IORING_OP_UNLINKAT = 36 IORING_OP_WRITE = 23 IORING_OP_WRITEV = 2 IORING_OP_WRITE_FIXED = 5 +IORING_RECVSEND_BUNDLE = 16 IORING_RECVSEND_FIXED_BUF = 4 IORING_RECVSEND_POLL_FIRST = 1 IORING_RECV_MULTISHOT = 2 @@ -94,13 +112,16 @@ IORING_REGISTER_FILES_UPDATE2 = 14 IORING_REGISTER_FILE_ALLOC_RANGE = 25 IORING_REGISTER_IOWQ_AFF = 17 IORING_REGISTER_IOWQ_MAX_WORKERS = 19 +IORING_REGISTER_MEM_REGION = 34 IORING_REGISTER_NAPI = 27 IORING_REGISTER_PBUF_RING = 22 IORING_REGISTER_PBUF_STATUS = 26 IORING_REGISTER_PERSONALITY = 9 IORING_REGISTER_PROBE = 8 +IORING_REGISTER_RESIZE_RINGS = 33 IORING_REGISTER_RESTRICTIONS = 11 IORING_REGISTER_RING_FDS = 20 +IORING_REGISTER_SEND_MSG_RING = 31 IORING_REGISTER_SYNC_CANCEL = 24 IORING_RESTRICTION_REGISTER_OP = 0 IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2 @@ -113,7 +134,11 @@ IORING_SETUP_COOP_TASKRUN = 256 IORING_SETUP_CQE32 = 2048 IORING_SETUP_CQSIZE = 8 IORING_SETUP_DEFER_TASKRUN = 8192 +IORING_SETUP_HYBRID_IOPOLL = 131072 IORING_SETUP_IOPOLL = 1 +IORING_SETUP_NO_MMAP = 16384 +IORING_SETUP_NO_SQARRAY = 65536 +IORING_SETUP_REGISTERED_FD_ONLY = 32768 IORING_SETUP_R_DISABLED = 64 IORING_SETUP_SINGLE_ISSUER = 4096 IORING_SETUP_SQE128 = 1024 @@ -122,6 +147,13 @@ IORING_SETUP_SQ_AFF = 4 IORING_SETUP_SUBMIT_ALL = 128 IORING_SETUP_TASKRUN_FLAG = 512 IORING_TIMEOUT_ABS = 1 +IORING_TIMEOUT_BOOTTIME = 4 +IORING_TIMEOUT_CLOCK_MASK = 12 +IORING_TIMEOUT_ETIME_SUCCESS = 32 +IORING_TIMEOUT_MULTISHOT = 64 +IORING_TIMEOUT_REALTIME = 8 +IORING_TIMEOUT_UPDATE = 2 +IORING_TIMEOUT_UPDATE_MASK = 18 IORING_UNREGISTER_BUFFERS = 1 IORING_UNREGISTER_EVENTFD = 5 IORING_UNREGISTER_FILES = 3