Chapter 6.2 Advanced Char Drivers - Blocking IO
💡Full code in https://github.com/kimgb415/modern-linux-device-driver/tree/Chapter6/BlockingIO
scull pipe device
Most of the code written for scull pipe device is still compatible with modern linux kernel, so this post will focus on the details and explanations of the code.
Device Initialization
Device initialization function handles the character device registration and initialization of some of data structures like wait_queue_head_t and semaphore. Note that the buffers of devices are lazily allocated upon the scull_p_open but not on initialization.
int scull_p_init(dev_t first_dev)
{
int result = register_chrdev_region(first_dev, scull_p_nr_deivces, "scullp");
if (result < 0) {
pr_warn("Unable to get scullp region, error %d\n", result);
return 0;
}
scull_p_devno = first_dev;
// Note that scull_p_devices is array of struct instead of array of struct pointer
scull_p_devices = kzalloc((sizeof(struct scull_pipe) * scull_p_nr_deivces),GFP_KERNEL);
if (scull_p_devices == NULL) {
unregister_chrdev_region(first_dev, scull_p_nr_deivces);
return 0;
}
for (int i = 0; i < scull_p_nr_deivces; ++i) {
struct scull_pipe *curr_dev = &scull_p_devices[i];
sema_init(&curr_dev->sem, 1);
init_waitqueue_head(&curr_dev->write_queue);
init_waitqueue_head(&curr_dev->read_queue);
curr_dev->buffer_size = scull_p_buffer;
scull_p_setup_cdev(curr_dev, i);
}
return scull_p_nr_deivces;
}🥲 Though shame on me, the subtle but critical mistake I made was the wrong memory allocation of
scull_p_deviceswithsizeof(struct scull_pipe *) * scull_p_nr_deivcesinstead ofsizeof(struct scull_pipe) * scull_p_nr_deivces. This buggy code led to the less memory allocated than needed for the array of structures, hence corrupting the kernel memory space and leading to the undefined behaviour of kernel.
Device Cleanup
Cleaning up the scull pipe device is all about freeing the allocated memory areas inside out as below. Don’t forget to mark the freed pointer as NULL to prevent the accidental memory access.
void scull_p_cleanup(void)
{
if (!scull_p_devices)
return;
for (int i = 0; i < scull_p_nr_deivces; ++i) {
struct scull_pipe *curr_dev = &scull_p_devices[i];
cdev_del(&curr_dev->cdev);
if (curr_dev->buffer_start) {
kfree(curr_dev->buffer_start);
curr_dev->buffer_start = NULL;
}
}
unregister_chrdev_region(scull_p_devno, scull_p_nr_deivces);
kfree(scull_p_devices);
scull_p_devices = NULL;
return;
}Open Device
Upon openning the scull device, make sure we store the pointer of scull_pipe data structure into filp->private_data for later manipulation of device upon read of write.
struct scull_pipe *dev;
dev = container_of(inode->i_cdev, struct scull_pipe, cdev);
filp->private_data = dev;We have to record if the device is opened for write or read so that we will only destroy the buffer upon release if no readers nor writers exist.
if (filp->f_flags & FMODE_READ)
dev->readers_cnt++;
if (filp->f_flags & FMODE_WRITE)
dev->writers_cnt++;With above details in mind, opening the scull pipe device is straightforward.
static int scull_p_open(struct inode *inode, struct file *filp)
{
struct scull_pipe *dev;
dev = container_of(inode->i_cdev, struct scull_pipe, cdev);
filp->private_data = dev;
if (down_interruptible(&dev->sem))
return -ERESTARTSYS;
if (!dev->buffer_start) {
dev->buffer_start = kzalloc(dev->buffer_size, GFP_KERNEL);
if (!dev->buffer_start)
return -ENOMEM;
dev->read_pointer = dev->write_pointer = dev->buffer_start;
dev->buffer_end = dev->buffer_start + dev->buffer_size;
}
if (filp->f_flags & FMODE_READ)
dev->readers_cnt++;
if (filp->f_flags & FMODE_WRITE)
dev->writers_cnt++;
up(&dev->sem);
return 0;
}Release Device
As mentioned above, scull pipe driver will simply discard all the remaining contents in the buffer and free the memory if no one is using the device any more.
static int scull_p_release(struct inode *inode, struct file *filp)
{
struct scull_pipe *dev = filp->private_data;
if (down_interruptible(&dev->sem))
return -ERESTARTSYS;
if (filp->f_flags & FMODE_READ)
dev->readers_cnt--;
if (filp->f_flags & FMODE_WRITE)
dev->writers_cnt--;
// discard remaining contents if no more consumers nor producers exist
if (dev->writers_cnt == 0 && dev->readers_cnt == 0) {
if (dev->buffer_start)
kfree(dev->buffer_start);
dev->buffer_start = NULL;
}
up(&dev->sem);
return 0;
}💡 I do consider this implementation of device release to be valid. However, there is a ‘wired’ scenario in which, should a writer successfully write content while no reader is waiting, the content will always be lost.
Scull Pipe Read & Write
The essence of Blocking I/O is that user processes performing I/O, which will be available in the future, will be blocked and then awakened. Readers waiting for content and writers waiting for free space share similarities in that:
- they release the semaphore and put themselves to sleep if the condition is not met,
- upon being awakened by the other party, they acquire the semaphore and double-check if the condition remains valid.
Blocking IO for the reader is shown as an example here.
if (down_interruptible(&dev->sem))
return -ERESTARTSYS;
while (dev->write_pointer == dev->read_pointer) {
up(&dev->sem);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
PDEBUG("pipe reader %s is about to sleep\n", current->comm);
// signal the wait event is interrupted
if (wait_event_interruptible(dev->read_queue, dev->write_pointer != dev->read_pointer))
return -ERESTARTSYS;
if (down_interruptible(&dev->sem))
return -ERESTARTSYS;
}💡 Another lesson I learned from the above implementation is that
wait_event_interruptibleis onlyinterruptibleif the return value of it is properly handled, which, in this case, by returning-ERESTARTSYS. If you’re interested, try removing the if statement that wrapswait_event_interruptible. You’ll find that the reader ends up in an endless loop of sleeping and awakening upon signals likeCtrl+C.
Besides blocking I/O, the tricky part of managing a scull pipe device involves handling the circular buffer. The following two points deserve particular attention:
- Writers can only write up to
buffer_size - 1bytes, thereby leaving one byte empty to indicate the buffer is full (write_pointeris one byte behind theread_pointer). - if either the reader or the writer goes beyond the end of the buffer, scull pipe driver will truncate the request to the end of circular buffer and reset the corresponding pointer to the beginning for the next request.
Again is the example from file_opertaions.read of circular buffer handling.
if (dev->write_pointer >= dev->read_pointer) {
// readers can consume all the content up to where write_pointer is
count = min(count, (size_t) (dev->write_pointer - dev->read_pointer));
} else {
count = min(count, (size_t) (dev->buffer_end - dev->read_pointer));
}
if (copy_to_user(buf, (size_t) dev->read_pointer, count)) {
// release the semaphore in case of EFAULT
up(&dev->sem);
return -EFAULT;
}
dev->read_pointer += count;
// wrap read pointer if reached the end of buffer
if (dev->read_pointer == dev->buffer_end)
dev->read_pointer = dev->buffer_start;Minor code modifications to use scull pipe
Since, the code for scull pipe is written in pipe.c, we have to make sure it’s compiled in Makefile.

Last but not least, don’t forget to add bash scripts to make device nodes for scull pipe upon module insertion and corresponding cleanup upon module removal.

Testing non-blokcing IO
I wrote a simple C program called non-blocking-test.c by mimicking nbtest.c to test the non-blocking IO behavior of the scull pipe device. It simply opens the pipe device and marks the file descriptor with O_NONBLOCK, then attempts to read its content. If no contents are available, it will sleep for a second before the next read attempt.
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <errno.h>
#define BUFFER_SIZE 4096
char buffer[BUFFER_SIZE];
int main(int argc, char **argv)
{
int fd = open("/dev/scullpipe0", O_RDONLY);
if (fd == -1) {
perror("Failed to open scull pipe device\n");
exit(1);
}
// mark file descriptor as non-blocking
fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
while (1) {
int n = read(fd, buffer, BUFFER_SIZE);
if (n >= 0)
printf("[scull pipe read] %s\n", buffer);
// break if error other than EAGAIN occurred
if (n < 0 && errno != EAGAIN) {
perror("Error occurred reading scull pipe device\n");
close(fd);
exit(1);
} else {
printf("Retry scull pipe read again\n");
}
// sleep for 1 second
sleep(1);
}
}poll syscall of scull pipe
When file_operations.poll is called for scull pipe device, it will register the current process into both read queue and write queue through poll_wait first and then calculate the poll mask. If the mask does not satisfy the one given from user space, the current user process will sleep. Upon the current process is awaken, it will recalculate the poll mask and return the result back to the user space program.
// poll_table *wait can be NULL if user space program called poll with timeout value of 0
static unsigned int scull_p_poll(struct file *filp, poll_table *wait)
{
struct scull_pipe *dev = filp->private_data;
unsigned int mask = 0;
down(&dev->sem);
// add current process to both write queue and read queue
// Note that the current process is not rescheduled immediately
poll_wait(filp, &dev->write_queue, wait);
poll_wait(filp, &dev->read_queue, wait);
PDEBUG("poll scull pipe device\n");
if (dev->read_pointer != dev->write_pointer)
mask |= POLLIN | POLLRDNORM;
if (space_free(dev))
mask |= POLLOUT | POLLWRNORM;
up(&dev->sem);
return mask;
}Let’s test if the polling works fine with user space C program poll_scull_pipe.c. This user space program polls for the POLLIN event for 5 seconds and then informs the user about polling result.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <poll.h>
#include <errno.h>
#define TIMEOUT_MS 5000
int main(int argc, char **argv)
{
int ret_val = 1;
struct pollfd *poll_fd = malloc(sizeof(struct pollfd));
if (poll_fd == NULL) {
perror("Failed to allocate memory");
goto exit;
}
// make sure it's opened as non-blokcing mode
poll_fd->fd = open("/dev/scullpipe0", O_RDONLY | O_NONBLOCK);
if (poll_fd->fd == -1) {
perror("Failed to open scull pipe device");
goto exit;
}
poll_fd->events = POLLIN;
int poll_result = poll(poll_fd, 1, TIMEOUT_MS);
if (poll_result == -1) {
perror("Failed to poll scull pipe");
} else if (poll_result == 0) {
perror("Timeout polling scull pipe");
} else {
if (poll_fd->revents & POLLIN) {
char buffer[1024];
ssize_t cnt = read(poll_fd->fd, buffer, sizeof(buffer) - 1);
if (cnt == -1) {
perror("Failed to read scull pipe");
goto exit;
}
buffer[cnt] = '\0';
printf("[scull pipe read] %s\n", buffer);
ret_val = 0;
}
}
exit:
if (poll_fd) {
if (poll_fd->fd != -1)
close(poll_fd->fd);
free(poll_fd);
}
return ret_val;
}