// Initialize io_uring ret = io_uring_queue_init(8, &ring, 0); if (ret < 0) { perror("io_uring_queue_init failed"); return1; }
buffer = (void*)0x2024/05/10/miniL-2024/0100; mmap((void*)0x2024/05/10/miniL-2024/0000, 0x3000, 3, 34, -1, 0); // Prepare open request sqe = io_uring_get_sqe(&ring); io_uring_prep_openat(sqe, AT_FDCWD, FILE_PATH, O_RDONLY, 0); sqe->flags |= IOSQE_IO_LINK; // Link this request to the next one
// Prepare read request sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, -1, buffer, BUFFER_SIZE, 0); // -1 will be filled by the open result sqe->flags |= IOSQE_IO_LINK; // Link this request to the next one
// Prepare write request sqe = io_uring_get_sqe(&ring); io_uring_prep_write(sqe, STDOUT_FILENO, buffer, BUFFER_SIZE, 0); sqe->user_data = 1; // Use user_data to identify this request
// Submit all requests at once ret = io_uring_submit(&ring); if (ret <= 0) { perror("io_uring_submit failed"); return1; }
// return 1; // Process completion events int completion_count = 0; while (completion_count < 3) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { perror("io_uring_wait_cqe failed"); break; }
if (cqe->user_data == 1) { // Check if it's the write completion if (cqe->res < 0) { fprintf(stderr, "Write failed: %s\n", strerror(-cqe->res)); } else { printf("Successfully written %d bytes\n", cqe->res); } }
int __io_uring_queue_init_params(unsigned entries, struct io_uring *ring, struct io_uring_params *p, void *buf, size_t buf_size) { int fd, ret = 0; unsigned *sq_array; unsigned sq_entries, index;
memset(ring, 0, sizeof(*ring));
/* * The kernel does this check already, but checking it here allows us * to avoid handling it below. */ if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY && !(p->flags & IORING_SETUP_NO_MMAP)) return -EINVAL;
if (p->flags & IORING_SETUP_NO_MMAP) { ret = io_uring_alloc_huge(entries, p, &ring->sq, &ring->cq, buf, buf_size); if (ret < 0) return ret; if (buf) ring->int_flags |= INT_FLAG_APP_MEM; }
structio_uring_sqe { __u8 opcode; /* type of operation for this sqe */ __u8 flags; /* IOSQE_ flags */ __u16 ioprio; /* ioprio for the request */ __s32 fd; /* file descriptor to do IO on */ union { __u64 off; /* offset into file */ __u64 addr2; struct { __u32 cmd_op; __u32 __pad1; }; }; union { __u64 addr; /* pointer to buffer or iovecs */ __u64 splice_off_in; struct { __u32 level; __u32 optname; }; }; __u32 len; /* buffer size or number of iovecs */ union { __kernel_rwf_t rw_flags; __u32 fsync_flags; __u16 poll_events; /* compatibility */ __u32 poll32_events; /* word-reversed for BE */ __u32 sync_range_flags; __u32 msg_flags; __u32 timeout_flags; __u32 accept_flags; __u32 cancel_flags; __u32 open_flags; __u32 statx_flags; __u32 fadvise_advice; __u32 splice_flags; __u32 rename_flags; __u32 unlink_flags; __u32 hardlink_flags; __u32 xattr_flags; __u32 msg_ring_flags; __u32 uring_cmd_flags; __u32 waitid_flags; __u32 futex_flags; __u32 install_fd_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ union { /* index into fixed buffers, if used */ __u16 buf_index; /* for grouped buffer selection */ __u16 buf_group; } __attribute__((packed)); /* personality to use, if used */ __u16 personality; union { __s32 splice_fd_in; __u32 file_index; __u32 optlen; struct { __u16 addr_len; __u16 __pad3[1]; }; }; union { struct { __u64 addr3; __u64 __pad2[1]; }; __u64 optval; /* * If the ring is initialized with IORING_SETUP_SQE128, then * this field is used for 80 bytes of arbitrary command data */ __u8 cmd[0]; }; };
./bzImage: Linux kernel x86 boot executable bzImage, version 6.6.1 (root@fuzz) #2 SMP PREEMPT_DYNAMIC Sat Nov 18 21:11:32 CST 2023, RO-rootFS, swap_dev 0XB, Normal VGA
if (!entries) return -EINVAL; if (entries > IORING_MAX_ENTRIES) { if (!(p->flags & IORING_SETUP_CLAMP)) return -EINVAL; entries = IORING_MAX_ENTRIES; }
if ((p->flags & IORING_SETUP_REGISTERED_FD_ONLY) && !(p->flags & IORING_SETUP_NO_MMAP)) return -EINVAL;
/* * Use twice as many entries for the CQ ring. It's possible for the * application to drive a higher depth than the size of the SQ ring, * since the sqes are only used at submission time. This allows for * some flexibility in overcommitting a bit. If the application has * set IORING_SETUP_CQSIZE, it will have passed in the desired number * of CQ ring entries manually. */ p->sq_entries = roundup_pow_of_two(entries); if (p->flags & IORING_SETUP_CQSIZE) { /* * If IORING_SETUP_CQSIZE is set, we do the same roundup * to a power-of-two, if it isn't already. We do NOT impose * any cq vs sq ring sizing. */ if (!p->cq_entries) return -EINVAL; if (p->cq_entries > IORING_MAX_CQ_ENTRIES) { if (!(p->flags & IORING_SETUP_CLAMP)) return -EINVAL; p->cq_entries = IORING_MAX_CQ_ENTRIES; } p->cq_entries = roundup_pow_of_two(p->cq_entries); if (p->cq_entries < p->sq_entries) return -EINVAL; } else { p->cq_entries = 2 * p->sq_entries; }
ctx = io_ring_ctx_alloc(p); if (!ctx) return -ENOMEM;
if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL)) ctx->lockless_cq = true;
/* * lazy poll_wq activation relies on ->task_complete for synchronisation * purposes, see io_activate_pollwq() */ if (!ctx->task_complete) ctx->poll_activated = true;
/* * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user * space applications don't need to do io completion events * polling again, they can rely on io_sq_thread to do polling * work, which can reduce cpu usage and uring_lock contention. */ if (ctx->flags & IORING_SETUP_IOPOLL && !(ctx->flags & IORING_SETUP_SQPOLL)) ctx->syscall_iopoll = 1;
ctx->compat = in_compat_syscall(); if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK)) ctx->user = get_uid(current_user());
/* * For SQPOLL, we just need a wakeup, always. For !SQPOLL, if * COOP_TASKRUN is set, then IPIs are never needed by the app. */ ret = -EINVAL; if (ctx->flags & IORING_SETUP_SQPOLL) { /* IPI related flags don't make sense with SQPOLL */ if (ctx->flags & (IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG | IORING_SETUP_DEFER_TASKRUN)) goto err; ctx->notify_method = TWA_SIGNAL_NO_IPI; } elseif (ctx->flags & IORING_SETUP_COOP_TASKRUN) { ctx->notify_method = TWA_SIGNAL_NO_IPI; } else { if (ctx->flags & IORING_SETUP_TASKRUN_FLAG && !(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) goto err; ctx->notify_method = TWA_SIGNAL; }
/* * For DEFER_TASKRUN we require the completion task to be the same as the * submission task. This implies that there is only one submitter, so enforce * that. */ if (ctx->flags & IORING_SETUP_DEFER_TASKRUN && !(ctx->flags & IORING_SETUP_SINGLE_ISSUER)) { goto err; }
/* * This is just grabbed for accounting purposes. When a process exits, * the mm is exited and dropped before the files, hence we need to hang * on to this mm purely for the purposes of being able to unaccount * memory (locked/pinned vm). It's not used for anything else. */ mmgrab(current->mm); ctx->mm_account = current->mm;
ret = io_allocate_scq_urings(ctx, p); if (ret) goto err;
ret = io_sq_offload_create(ctx, p); if (ret) goto err;
if (copy_to_user(params, p, sizeof(*p))) { ret = -EFAULT; goto err; }
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !(ctx->flags & IORING_SETUP_R_DISABLED)) WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
file = io_uring_get_file(ctx); if (IS_ERR(file)) { ret = PTR_ERR(file); goto err; }
ret = __io_uring_add_tctx_node(ctx); if (ret) goto err_fput; tctx = current->io_uring;
/* * Install ring fd as the very last thing, so we don't risk someone * having closed it before we finish setup */ if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY) ret = io_ring_add_registered_file(tctx, file, 0, IO_RINGFD_REG_MAX); else ret = io_uring_install_fd(file); if (ret < 0) goto err_fput;