/*1* Copyright 2010 Tilera Corporation. All Rights Reserved.2*3* This program is free software; you can redistribute it and/or4* modify it under the terms of the GNU General Public License5* as published by the Free Software Foundation, version 2.6*7* This program is distributed in the hope that it will be useful, but8* WITHOUT ANY WARRANTY; without even the implied warranty of9* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or10* NON INFRINGEMENT. See the GNU General Public License for11* more details.12*13* Support routines for atomic operations. Each function takes:14*15* r0: address to manipulate16* r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG)17* r2: new value to write, or for cmpxchg/add_unless, value to compare against18* r3: (cmpxchg/xchg_add_unless) new value to write or add;19* (atomic64 ops) high word of value to write20* r4/r5: (cmpxchg64/add_unless64) new value to write or add21*22* The 32-bit routines return a "struct __get_user" so that the futex code23* has an opportunity to return -EFAULT to the user if needed.24* The 64-bit routines just return a "long long" with the value,25* since they are only used from kernel space and don't expect to fault.26* Support for 16-bit ops is included in the framework but we don't provide27* any (x86_64 has an atomic_inc_short(), so we might want to some day).28*29* Note that the caller is advised to issue a suitable L1 or L230* prefetch on the address being manipulated to avoid extra stalls.31* In addition, the hot path is on two icache lines, and we start with32* a jump to the second line to make sure they are both in cache so33* that we never stall waiting on icache fill while holding the lock.34* (This doesn't work out with most 64-bit ops, since they consume35* too many bundles, so may take an extra i-cache stall.)36*37* These routines set the INTERRUPT_CRITICAL_SECTION bit, just38* like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt39* the code, just page faults.40*41* If the load or store faults in a way that can be directly fixed in42* the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it43* directly, return to the instruction that faulted, and retry it.44*45* If the load or store faults in a way that potentially requires us46* to release the atomic lock, then retry (e.g. a migrating PTE), we47* reset the PC in do_page_fault_ics() to the "tns" instruction so48* that on return we will reacquire the lock and restart the op. We49* are somewhat overloading the exception_table_entry notion by doing50* this, since those entries are not normally used for migrating PTEs.51*52* If the main page fault handler discovers a bad address, it will see53* the PC pointing to the "tns" instruction (due to the earlier54* exception_table_entry processing in do_page_fault_ics), and55* re-reset the PC to the fault handler, atomic_bad_address(), which56* effectively takes over from the atomic op and can either return a57* bad "struct __get_user" (for user addresses) or can just panic (for58* bad kernel addresses).59*60* Note that if the value we would store is the same as what we61* loaded, we bypass the store. Other platforms with true atomics can62* make the guarantee that a non-atomic __clear_bit(), for example,63* can safely race with an atomic test_and_set_bit(); this example is64* from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do65* that on Tile since the "atomic" op is really just a66* read/modify/write, and can race with the non-atomic67* read/modify/write. However, if we can short-circuit the write when68* it is not needed, in the atomic case, we avoid the race.69*/7071#include <linux/linkage.h>72#include <asm/atomic.h>73#include <asm/page.h>74#include <asm/processor.h>7576.section .text.atomic,"ax"77ENTRY(__start_atomic_asm_code)7879.macro atomic_op, name, bitwidth, body80.align 6481STD_ENTRY_SECTION(__atomic\name, .text.atomic)82{83movei r24, 184j 4f /* branch to second cache line */85}861: {87.ifc \bitwidth,1688lh r22, r089.else90lw r22, r091addi r28, r0, 492.endif93}94.ifc \bitwidth,6495lw r23, r2896.endif97\body /* set r24, and r25 if 64-bit */98{99seq r26, r22, r24100seq r27, r23, r25101}102.ifc \bitwidth,64103bbnst r27, 2f104.endif105bbs r26, 3f /* skip write-back if it's the same value */1062: {107.ifc \bitwidth,16108sh r0, r24109.else110sw r0, r24111.endif112}113.ifc \bitwidth,64114sw r28, r25115.endif116mf1173: {118move r0, r22119.ifc \bitwidth,64120move r1, r23121.else122move r1, zero123.endif124sw ATOMIC_LOCK_REG_NAME, zero125}126mtspr INTERRUPT_CRITICAL_SECTION, zero127jrp lr1284: {129move ATOMIC_LOCK_REG_NAME, r1130mtspr INTERRUPT_CRITICAL_SECTION, r24131}132#ifndef CONFIG_SMP133j 1b /* no atomic locks */134#else135{136tns r21, ATOMIC_LOCK_REG_NAME137moveli r23, 2048 /* maximum backoff time in cycles */138}139{140bzt r21, 1b /* branch if lock acquired */141moveli r25, 32 /* starting backoff time in cycles */142}1435: mtspr INTERRUPT_CRITICAL_SECTION, zero144mfspr r26, CYCLE_LOW /* get start point for this backoff */1456: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */146sub r22, r22, r26147slt r22, r22, r25148bbst r22, 6b149{150mtspr INTERRUPT_CRITICAL_SECTION, r24151shli r25, r25, 1 /* double the backoff; retry the tns */152}153{154tns r21, ATOMIC_LOCK_REG_NAME155slt r26, r23, r25 /* is the proposed backoff too big? */156}157{158bzt r21, 1b /* branch if lock acquired */159mvnz r25, r26, r23160}161j 5b162#endif163STD_ENDPROC(__atomic\name)164.ifc \bitwidth,32165.pushsection __ex_table,"a"166.word 1b, __atomic\name167.word 2b, __atomic\name168.word __atomic\name, __atomic_bad_address169.popsection170.endif171.endm172173atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"174atomic_op _xchg, 32, "move r24, r2"175atomic_op _xchg_add, 32, "add r24, r22, r2"176atomic_op _xchg_add_unless, 32, \177"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"178atomic_op _or, 32, "or r24, r22, r2"179atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"180atomic_op _xor, 32, "xor r24, r22, r2"181182atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \183{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"184atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"185atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \186slt_u r26, r24, r22; add r25, r25, r26"187atomic_op 64_xchg_add_unless, 64, \188"{ sne r26, r22, r2; sne r27, r23, r3 }; \189{ bbns r26, 3f; add r24, r22, r4 }; \190{ bbns r27, 3f; add r25, r23, r5 }; \191slt_u r26, r24, r22; add r25, r25, r26"192193jrp lr /* happy backtracer */194195ENTRY(__end_atomic_asm_code)196197198