vfs: Handle O_SYNC AIO DIO in generic code properly
Provide VFS helpers for handling O_SYNC AIO DIO writes. Filesystem wanting to
use the helpers has to pass DIO_SYNC_WRITES to __blockdev_direct_IO. Then if
they don't use direct IO end_io handler, generic code takes care of everything
else. Otherwise their end_io handler is passed struct dio_sync_io_work pointer
as 'private' argument and they have to call generic_dio_end_io() to finish
their AIO DIO. Generic code then takes care to call generic_write_sync() from
a workqueue context when AIO DIO is completed.
Since all filesystems using blockdev_direct_IO() need O_SYNC aio dio handling
and the generic one is enough for them, make blockdev_direct_IO() pass
DIO_SYNC_WRITES flag.
+#include <asm/cmpxchg.h>
+
/*
* How many user pages to map in one call to get_user_pages(). This determines
* the size of a structure in the slab cache
@@ -112,6 +114,15 @@ struct dio_submit {
unsigned tail; /* last valid page + 1 */
};
+/* state needed for final sync and completion of O_SYNC AIO DIO */
+struct dio_sync_io_work {
+ struct kiocb *iocb;
+ loff_t offset;
+ ssize_t len;
+ int ret;
+ struct work_struct work;
+};
+
/* dio_state communicated between submission path and end_io */
struct dio {
int flags; /* doesn't change */
@@ -134,6 +145,7 @@ struct dio {
/* AIO related stuff */
struct kiocb *iocb; /* kiocb */
ssize_t result; /* IO result */
+ struct dio_sync_io_work *sync_work; /* work used for O_SYNC AIO */
/*
* pages[] (and any fields placed after it) are not zeroed out at
@@ -261,6 +273,45 @@ static inline struct page *dio_get_page(struct dio *dio,
}
/**
+ * generic_dio_end_io() - generic dio ->end_io handler
+ * @iocb: iocb of finishing DIO
+ * @offset: the byte offset in the file of the completed operation
+ * @bytes: length of the completed operation
+ * @work: work to queue for O_SYNC AIO DIO, NULL otherwise
+ * @ret: error code if IO failed
+ * @is_async: is this AIO?
+ *
+ * This is generic callback to be called when direct IO is finished. It
+ * handles update of number of outstanding DIOs for an inode, completion
+ * of async iocb and queueing of work if we need to call fsync() because
+ * io was O_SYNC.
+ */
+void generic_dio_end_io(struct kiocb *iocb, loff_t offset, ssize_t bytes,
+ struct dio_sync_io_work *work, int ret, bool is_async)
+{
+ struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
+
+ if (!is_async) {
+ inode_dio_done(inode);
+ return;
+ }
+
+ /*
+ * If we need to sync file, we offload completion to workqueue
+ */
+ if (work) {
+ work->ret = ret;
+ work->offset = offset;
+ work->len = bytes;
+ queue_work(inode->i_sb->s_dio_flush_wq, &work->work);
+ } else {
+ aio_complete(iocb, ret, 0);
+ inode_dio_done(inode);
+ }
+}
+EXPORT_SYMBOL(generic_dio_end_io);
+
+/**
* dio_complete() - called when all DIO BIO I/O has been completed
* @offset: the byte offset in the file of the completed operation
*
@@ -302,12 +353,22 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
ret = transferred;
/* Being remounted read-only */
int s_readonly_remount;
+
+ /* Pending fsync calls for completed AIO DIO with O_SYNC */
+ struct workqueue_struct *s_dio_flush_wq;
};