/*1* Copyright (C) 2008 Oracle. All rights reserved.2*3* This program is free software; you can redistribute it and/or4* modify it under the terms of the GNU General Public5* License v2 as published by the Free Software Foundation.6*7* This program is distributed in the hope that it will be useful,8* but WITHOUT ANY WARRANTY; without even the implied warranty of9* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU10* General Public License for more details.11*12* You should have received a copy of the GNU General Public13* License along with this program; if not, write to the14* Free Software Foundation, Inc., 59 Temple Place - Suite 330,15* Boston, MA 021110-1307, USA.16*/17#include <linux/sched.h>18#include <linux/pagemap.h>19#include <linux/spinlock.h>20#include <linux/page-flags.h>21#include <asm/bug.h>22#include "ctree.h"23#include "extent_io.h"24#include "locking.h"2526static inline void spin_nested(struct extent_buffer *eb)27{28spin_lock(&eb->lock);29}3031/*32* Setting a lock to blocking will drop the spinlock and set the33* flag that forces other procs who want the lock to wait. After34* this you can safely schedule with the lock held.35*/36void btrfs_set_lock_blocking(struct extent_buffer *eb)37{38if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {39set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);40spin_unlock(&eb->lock);41}42/* exit with the spin lock released and the bit set */43}4445/*46* clearing the blocking flag will take the spinlock again.47* After this you can't safely schedule48*/49void btrfs_clear_lock_blocking(struct extent_buffer *eb)50{51if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {52spin_nested(eb);53clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);54smp_mb__after_clear_bit();55}56/* exit with the spin lock held */57}5859/*60* unfortunately, many of the places that currently set a lock to blocking61* don't end up blocking for very long, and often they don't block62* at all. For a dbench 50 run, if we don't spin on the blocking bit63* at all, the context switch rate can jump up to 400,000/sec or more.64*65* So, we're still stuck with this crummy spin on the blocking bit,66* at least until the most common causes of the short blocks67* can be dealt with.68*/69static int btrfs_spin_on_block(struct extent_buffer *eb)70{71int i;7273for (i = 0; i < 512; i++) {74if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))75return 1;76if (need_resched())77break;78cpu_relax();79}80return 0;81}8283/*84* This is somewhat different from trylock. It will take the85* spinlock but if it finds the lock is set to blocking, it will86* return without the lock held.87*88* returns 1 if it was able to take the lock and zero otherwise89*90* After this call, scheduling is not safe without first calling91* btrfs_set_lock_blocking()92*/93int btrfs_try_spin_lock(struct extent_buffer *eb)94{95int i;9697if (btrfs_spin_on_block(eb)) {98spin_nested(eb);99if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))100return 1;101spin_unlock(&eb->lock);102}103/* spin for a bit on the BLOCKING flag */104for (i = 0; i < 2; i++) {105cpu_relax();106if (!btrfs_spin_on_block(eb))107break;108109spin_nested(eb);110if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))111return 1;112spin_unlock(&eb->lock);113}114return 0;115}116117/*118* the autoremove wake function will return 0 if it tried to wake up119* a process that was already awake, which means that process won't120* count as an exclusive wakeup. The waitq code will continue waking121* procs until it finds one that was actually sleeping.122*123* For btrfs, this isn't quite what we want. We want a single proc124* to be notified that the lock is ready for taking. If that proc125* already happen to be awake, great, it will loop around and try for126* the lock.127*128* So, btrfs_wake_function always returns 1, even when the proc that we129* tried to wake up was already awake.130*/131static int btrfs_wake_function(wait_queue_t *wait, unsigned mode,132int sync, void *key)133{134autoremove_wake_function(wait, mode, sync, key);135return 1;136}137138/*139* returns with the extent buffer spinlocked.140*141* This will spin and/or wait as required to take the lock, and then142* return with the spinlock held.143*144* After this call, scheduling is not safe without first calling145* btrfs_set_lock_blocking()146*/147int btrfs_tree_lock(struct extent_buffer *eb)148{149DEFINE_WAIT(wait);150wait.func = btrfs_wake_function;151152if (!btrfs_spin_on_block(eb))153goto sleep;154155while(1) {156spin_nested(eb);157158/* nobody is blocking, exit with the spinlock held */159if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))160return 0;161162/*163* we have the spinlock, but the real owner is blocking.164* wait for them165*/166spin_unlock(&eb->lock);167168/*169* spin for a bit, and if the blocking flag goes away,170* loop around171*/172cpu_relax();173if (btrfs_spin_on_block(eb))174continue;175sleep:176prepare_to_wait_exclusive(&eb->lock_wq, &wait,177TASK_UNINTERRUPTIBLE);178179if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))180schedule();181182finish_wait(&eb->lock_wq, &wait);183}184return 0;185}186187int btrfs_tree_unlock(struct extent_buffer *eb)188{189/*190* if we were a blocking owner, we don't have the spinlock held191* just clear the bit and look for waiters192*/193if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))194smp_mb__after_clear_bit();195else196spin_unlock(&eb->lock);197198if (waitqueue_active(&eb->lock_wq))199wake_up(&eb->lock_wq);200return 0;201}202203void btrfs_assert_tree_locked(struct extent_buffer *eb)204{205if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))206assert_spin_locked(&eb->lock);207}208209210