Chapter 6.1 Advanced Char Drivers - Scull IOCTL
6.1 ioctl
💡Full code in https://github.com/kimgb415/modern-linux-device-driver/tree/Chapter6/ioctl.
⚠️
scull_piperealted ioctl commands are not implemented in this section.
scull.h
The way ioctl methods being defined are refactored in the header file. enum SCULL_IOC_ENUM is declared to automatically track the indices for each ioctl command.
enum SCULL_IOC_ENUM {
SCULL_IOCRESET_IDX,
SCULL_IOCSQUANTUM_IDX,
SCULL_IOCSQSET_IDX,
SCULL_IOCTQUANTUM_IDX,
SCULL_IOCTQSET_IDX,
SCULL_IOCGQUANTUM_IDX,
SCULL_IOCGQSET_IDX,
SCULL_IOCQQUANTUM_IDX,
SCULL_IOCQQSET_IDX,
SCULL_IOCXQUANTUM_IDX,
SCULL_IOCXQSET_IDX,
SCULL_IOCHQUANTUM_IDX,
SCULL_IOCHQSET_IDX,
SCULL_IOC_MAXNR
};And all second arguments for ioctl declaration macros are passed with the corresponding enum value. I consider this approach more appropriate, since the maximum number of ioctl calls might change.
#define SCULL_IOCRESET _IO(SCULL_IOC_MAGIC, SCULL_IOCRESET_IDX)
/*
* S means "Set" through a ptr,
* T means "Tell" directly with the argument value
* G means "Get": reply by setting through a pointer
* Q means "Query": response is on the return value
* X means "eXchange": switch G and S atomically
* H means "sHift": switch T and Q atomically
*/
#define SCULL_IOCSQUANTUM _IOW(SCULL_IOC_MAGIC, SCULL_IOCSQUANTUM_IDX, int)
#define SCULL_IOCSQSET _IOW(SCULL_IOC_MAGIC, SCULL_IOCSQSET_IDX, int)
#define SCULL_IOCTQUANTUM _IO(SCULL_IOC_MAGIC, SCULL_IOCTQUANTUM_IDX)
#define SCULL_IOCTQSET _IO(SCULL_IOC_MAGIC, SCULL_IOCTQSET_IDX)
#define SCULL_IOCGQUANTUM _IOR(SCULL_IOC_MAGIC, SCULL_IOCGQUANTUM_IDX, int)
#define SCULL_IOCGQSET _IOR(SCULL_IOC_MAGIC, SCULL_IOCGQSET_IDX, int)
#define SCULL_IOCQQUANTUM _IO(SCULL_IOC_MAGIC, SCULL_IOCQQUANTUM_IDX)
#define SCULL_IOCQQSET _IO(SCULL_IOC_MAGIC, SCULL_IOCQQSET_IDX)
#define SCULL_IOCXQUANTUM _IOWR(SCULL_IOC_MAGIC,SCULL_IOCXQUANTUM_IDX, int)
#define SCULL_IOCXQSET _IOWR(SCULL_IOC_MAGIC,SCULL_IOCXQSET_IDX, int)
#define SCULL_IOCHQUANTUM _IO(SCULL_IOC_MAGIC, SCULL_IOCHQUANTUM_IDX)
#define SCULL_IOCHQSET _IO(SCULL_IOC_MAGIC, SCULL_IOCHQSET_IDX)main.c
Function signature of access_ok has changed so that it no more differentiates the direction of data access. It only requires addr and size as arguments.
static inline int __access_ok(const void __user *ptr, unsigned long size);Hence, the code that validates the user address is simplified to a single if statement.
if (!access_ok((void __user *)arg, _IOC_SIZE(cmd)))
return -EFAULT;For another, the ioctl-related member field for file_operations has changed from .ioctl to .unlocked_ioctl. .unlocked_ioctl is introduced to replace the original .ioctl to address the bottleneck issue of Big Kernel Lock, while leaving the responsibility of proper locking to driver developer.
struct file_operations scull_fops = {
.owner = THIS_MODULE,
.open = scull_open,
.release = scull_release,
.read = scull_read,
.write = scull_write,
.unlocked_ioctl = scull_ioctl,
};The implementation of unlocked_ioctl is very much the same as the one in original example code.
⚠️ Note the signature of
unloced_ioctlislong (*) (struct file *,unsigned long, unsigned long).
long scull_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
/*
* extract the type and number bitfields, and don't decode
* wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
*/
if (_IOC_TYPE(cmd) != SCULL_IOC_MAGIC) return -ENOTTY;
if (_IOC_NR(cmd) > SCULL_IOC_MAXNR) return -ENOTTY;
if (!access_ok((void __user *)arg, _IOC_SIZE(cmd)))
return -EFAULT;
int retval = 0, tmp;
switch(cmd) {
case SCULL_IOCRESET:
scull_quantum = SCULL_QUANTUM;
scull_qset = SCULL_QSET;
break;
case SCULL_IOCSQUANTUM: /* Set: arg points to the value */
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
retval = __get_user(scull_quantum, (int __user *)arg);
break;
case SCULL_IOCTQUANTUM: /* Tell: arg is the value */
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
scull_quantum = arg;
break;
case SCULL_IOCGQUANTUM: /* Get: arg is pointer to result */
retval = __put_user(scull_quantum, (int __user *)arg);
break;
case SCULL_IOCQQUANTUM: /* Query: return it (it's positive) */
return scull_quantum;
case SCULL_IOCXQUANTUM: /* eXchange: use arg as pointer */
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
tmp = scull_quantum;
retval = __get_user(scull_quantum, (int __user *)arg);
if (retval == 0)
retval = __put_user(tmp, (int __user *)arg);
break;
case SCULL_IOCHQUANTUM: /* sHift: like Tell + Query */
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
tmp = scull_quantum;
scull_quantum = arg;
return tmp;
case SCULL_IOCSQSET:
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
retval = __get_user(scull_qset, (int __user *)arg);
break;
case SCULL_IOCTQSET:
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
scull_qset = arg;
break;
case SCULL_IOCGQSET:
retval = __put_user(scull_qset, (int __user *)arg);
break;
case SCULL_IOCQQSET:
return scull_qset;
case SCULL_IOCXQSET:
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
tmp = scull_qset;
retval = __get_user(scull_qset, (int __user *)arg);
if (retval == 0)
retval = put_user(tmp, (int __user *)arg);
break;
case SCULL_IOCHQSET:
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
tmp = scull_qset;
scull_qset = arg;
return tmp;
default: /* redundant, as cmd was checked against MAXNR */
return -ENOTTY;
}
return retval;
}Test scull ioctl
Following simple user-space program can be used to test the ioctl commands we have implemented. It simply checks the original value of quantum and qset and verify if they are properly changed.
scull_ioctl.c
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <errno.h>
#include "scull_ioctl.h"
void get_values(int *quantum, int *qset, int *fd)
{
int retval;
retval = ioctl(*fd, SCULL_IOCGQUANTUM, quantum);
printf("[SCULL_IOCGQUANTUM] quantum = %d, retval = %d\n", *quantum, retval < 0 ? errno : retval);
retval = ioctl(*fd, SCULL_IOCGQSET, qset);
printf("[SCULL_IOCGQSET] qset = %d, retval = %d\n", *qset, retval < 0 ? errno : retval);
}
void set_values(int *quantum, int *qset, int *fd)
{
int retval;
retval = ioctl(*fd, SCULL_IOCSQUANTUM, quantum);
printf("[SCULL_IOCSQUANTUM] quantum = %d, retval = %d\n", *quantum, retval < 0 ? errno : retval);
retval = ioctl(*fd, SCULL_IOCSQSET, qset);
printf("[SCULL_IOCSQSET] qset = %d, retval = %d\n", *qset, retval < 0 ? errno : retval);
}
int main() {
int fd = open("/dev/scull0", O_RDWR);
if (fd < 0) {
perror("Failed to open device");
return -1;
}
int quantum, qset;
get_values(&quantum, &qset, &fd);
quantum = 10;
qset = 20;
set_values(&quantum, &qset, &fd);
get_values(&quantum, &qset, &fd);
close(fd);
return 0;
}When we run the program with admin privileges, the corresponding values are indeed properly changed through ioctl commands.

6.2 Blocking I/O
sleepy module
If a user process writes to sleepy module, it will set the flag and wake up all the processes in the given wait_queue
ssize_t sleepy_write(struct file *filp, const char __user *buf, size_t count, loff_t* offset)
{
MDEBUG("Process %i (%s) awakening reader processes\n", current->pid, current->comm);
flag = 1;
wake_up_interruptible(&wait_queue);
// return to prevent the retrail of write system call
return count;
}When a user process reads sleepy module, it will sleep if flag is not set simply return after resetting flag to 0.
ssize_t sleepy_read(struct file *filp, char __user *buf, size_t count, loff_t* offset)
{
MDEBUG("Process %i (%s) sleep to wait for writers\n", current->pid, current->comm);
wait_event_interruptible(wait_queue, flag != 0);
flag = 0;
MDEBUG("Process %i (%s) awaken\n", current->pid, current->comm);
// indicates EOF
return 0;
}Race condition of sleepy module
As mentioned in the book, the sleepy module is vulnerable to the race condition upon awakening the reader processes. It is not guranteed that only one reader process will be awaken at a time due to the possible race condition at the moment between wait_event_interruptible(wait_queue, flag != 0); and flag = 0; in sleepy_read function.
This race condition can be easily produced if there are multiple reader processes sleeping in wait queue before a write system call occurs. multithread_sleepy.py spawns multiple threads to read the sleepy module and those threads done reading will print out a simple message.
import os
import threading
sleepy_device = os.path.join(os.path.sep, "dev", "sleepy")
thread_count = 10
def read_sleepy(idx):
with open(sleepy_device, "r") as device:
device.read()
print(f"thread {idx} awake up")
threads = []
for i in range(thread_count):
t = threading.Thread(target=read_sleepy, args=(i,))
threads.append(t)
t.start()
for t in threads:
t.join()Run the python script through sudo -E python3 multithread_sleepy.py and then write to sleepy device through echo hello | sudo dd of=/dev/sleepy. If you are lucky enough, you will see multiple threads are awaken on a single write.
