diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3f180b857e20..1e7f2e2ba4f0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2912,6 +2912,26 @@ static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,
 	btrfs_put_block_group(cache);
 }
 
+static struct btrfs_trans_handle *finish_io_join_trans(struct inode *inode)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	bool nolock = false;
+
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+		nolock = true;
+	} else if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
+		spin_lock(&fs_info->trans_lock);
+		if (fs_info->running_transaction &&
+		    fs_info->running_transaction->state >=
+		    TRANS_STATE_COMMIT_START)
+			nolock = true;
+		spin_unlock(&fs_info->trans_lock);
+	}
+
+	if (nolock)
+		return btrfs_join_transaction_nolock(root);
+
+	return btrfs_join_transaction(root);
+}
+
 /* as ordered data IO finishes, this gets called so we can finish
  * an ordered extent if the range of bytes in the file it covers are
  * fully written.
@@ -2928,7 +2948,6 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	int compress_type = 0;
 	int ret = 0;
 	u64 logical_len = ordered_extent->len;
-	bool nolock;
 	bool truncated = false;
 	bool range_locked = false;
 	bool clear_new_delalloc_bytes = false;
@@ -2939,8 +2958,6 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	    !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
 		clear_new_delalloc_bytes = true;
 
-	nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
-
 	if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
 		ret = -EIO;
 		goto out;
@@ -2970,10 +2987,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
 				       ordered_extent->len);
 		btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-		if (nolock)
-			trans = btrfs_join_transaction_nolock(root);
-		else
-			trans = btrfs_join_transaction(root);
+		trans = finish_io_join_trans(inode);
 		if (IS_ERR(trans)) {
 			ret = PTR_ERR(trans);
 			trans = NULL;
@@ -3005,10 +3019,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 			EXTENT_DEFRAG, 0, 0, &cached_state);
 	}
 
-	if (nolock)
-		trans = btrfs_join_transaction_nolock(root);
-	else
-		trans = btrfs_join_transaction(root);
+	trans = finish_io_join_trans(inode);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
 		trans = NULL;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index acdad6d658f5..d776990fe8e5 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1888,17 +1888,8 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
 
 static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
 {
-	/*
-	 * We use writeback_inodes_sb here because if we used
-	 * btrfs_start_delalloc_roots we would deadlock with fs freeze.
-	 * Currently are holding the fs freeze lock, if we do an async flush
-	 * we'll do btrfs_join_transaction() and deadlock because we need to
-	 * wait for the fs freeze lock.  Using the direct flushing we benefit
-	 * from already being in a transaction and our join_transaction doesn't
-	 * have to re-take the fs freeze lock.
-	 */
 	if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
-		writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
+		return btrfs_start_delalloc_roots(fs_info, -1);
 	return 0;
 }