Path: blob/master/arch/blackfin/mach-bf561/atomic.S
10817 views
/*1* Copyright 2007-2008 Analog Devices Inc.2* Philippe Gerum <[email protected]>3*4* Licensed under the GPL-2 or later.5*/67#include <linux/linkage.h>8#include <asm/blackfin.h>9#include <asm/cache.h>10#include <asm/asm-offsets.h>11#include <asm/rwlock.h>12#include <asm/cplb.h>1314.text1516.macro coreslot_loadaddr reg:req17\reg\().l = _corelock;18\reg\().h = _corelock;19.endm2021.macro safe_testset addr:req, scratch:req22#if ANOMALY_0500047723cli \scratch;24testset (\addr);25sti \scratch;26#else27testset (\addr);28#endif29.endm3031/*32* r0 = address of atomic data to flush and invalidate (32bit).33*34* Clear interrupts and return the old mask.35* We assume that no atomic data can span cachelines.36*37* Clobbers: r2:0, p038*/39ENTRY(_get_core_lock)40r1 = -L1_CACHE_BYTES;41r1 = r0 & r1;42cli r0;43coreslot_loadaddr p0;44.Lretry_corelock:45safe_testset p0, r2;46if cc jump .Ldone_corelock;47SSYNC(r2);48jump .Lretry_corelock49.Ldone_corelock:50p0 = r1;51/* flush core internal write buffer before invalidate dcache */52CSYNC(r2);53flushinv[p0];54SSYNC(r2);55rts;56ENDPROC(_get_core_lock)5758/*59* r0 = address of atomic data in uncacheable memory region (32bit).60*61* Clear interrupts and return the old mask.62*63* Clobbers: r0, p064*/65ENTRY(_get_core_lock_noflush)66cli r0;67coreslot_loadaddr p0;68.Lretry_corelock_noflush:69safe_testset p0, r2;70if cc jump .Ldone_corelock_noflush;71SSYNC(r2);72jump .Lretry_corelock_noflush73.Ldone_corelock_noflush:74rts;75ENDPROC(_get_core_lock_noflush)7677/*78* r0 = interrupt mask to restore.79* r1 = address of atomic data to flush and invalidate (32bit).80*81* Interrupts are masked on entry (see _get_core_lock).82* Clobbers: r2:0, p083*/84ENTRY(_put_core_lock)85/* Write-through cache assumed, so no flush needed here. */86coreslot_loadaddr p0;87r1 = 0;88[p0] = r1;89SSYNC(r2);90sti r0;91rts;92ENDPROC(_put_core_lock)9394#ifdef __ARCH_SYNC_CORE_DCACHE9596ENTRY(___raw_smp_mark_barrier_asm)97[--sp] = rets;98[--sp] = ( r7:5 );99[--sp] = r0;100[--sp] = p1;101[--sp] = p0;102call _get_core_lock_noflush;103104/*105* Calculate current core mask106*/107GET_CPUID(p1, r7);108r6 = 1;109r6 <<= r7;110111/*112* Set bit of other cores in barrier mask. Don't change current core bit.113*/114p1.l = _barrier_mask;115p1.h = _barrier_mask;116r7 = [p1];117r5 = r7 & r6;118r7 = ~r6;119cc = r5 == 0;120if cc jump 1f;121r7 = r7 | r6;1221:123[p1] = r7;124SSYNC(r2);125126call _put_core_lock;127p0 = [sp++];128p1 = [sp++];129r0 = [sp++];130( r7:5 ) = [sp++];131rets = [sp++];132rts;133ENDPROC(___raw_smp_mark_barrier_asm)134135ENTRY(___raw_smp_check_barrier_asm)136[--sp] = rets;137[--sp] = ( r7:5 );138[--sp] = r0;139[--sp] = p1;140[--sp] = p0;141call _get_core_lock_noflush;142143/*144* Calculate current core mask145*/146GET_CPUID(p1, r7);147r6 = 1;148r6 <<= r7;149150/*151* Clear current core bit in barrier mask if it is set.152*/153p1.l = _barrier_mask;154p1.h = _barrier_mask;155r7 = [p1];156r5 = r7 & r6;157cc = r5 == 0;158if cc jump 1f;159r6 = ~r6;160r7 = r7 & r6;161[p1] = r7;162SSYNC(r2);163164call _put_core_lock;165166/*167* Invalidate the entire D-cache of current core.168*/169sp += -12;170call _resync_core_dcache171sp += 12;172jump 2f;1731:174call _put_core_lock;1752:176p0 = [sp++];177p1 = [sp++];178r0 = [sp++];179( r7:5 ) = [sp++];180rets = [sp++];181rts;182ENDPROC(___raw_smp_check_barrier_asm)183184/*185* r0 = irqflags186* r1 = address of atomic data187*188* Clobbers: r2:0, p1:0189*/190_start_lock_coherent:191192[--sp] = rets;193[--sp] = ( r7:6 );194r7 = r0;195p1 = r1;196197/*198* Determine whether the atomic data was previously199* owned by another CPU (=r6).200*/201GET_CPUID(p0, r2);202r1 = 1;203r1 <<= r2;204r2 = ~r1;205206r1 = [p1];207r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */208r6 = r1 & r2;209r1 = [p1];210r1 <<= 4;211r1 >>= 4;212[p1] = r1;213214/*215* Release the core lock now, but keep IRQs disabled while we are216* performing the remaining housekeeping chores for the current CPU.217*/218coreslot_loadaddr p0;219r1 = 0;220[p0] = r1;221222/*223* If another CPU has owned the same atomic section before us,224* then our D-cached copy of the shared data protected by the225* current spin/write_lock may be obsolete.226*/227cc = r6 == 0;228if cc jump .Lcache_synced229230/*231* Invalidate the entire D-cache of the current core.232*/233sp += -12;234call _resync_core_dcache235sp += 12;236237.Lcache_synced:238SSYNC(r2);239sti r7;240( r7:6 ) = [sp++];241rets = [sp++];242rts243244/*245* r0 = irqflags246* r1 = address of atomic data247*248* Clobbers: r2:0, p1:0249*/250_end_lock_coherent:251252p1 = r1;253GET_CPUID(p0, r2);254r2 += 28;255r1 = 1;256r1 <<= r2;257r2 = [p1];258r2 = r1 | r2;259[p1] = r2;260r1 = p1;261jump _put_core_lock;262263#endif /* __ARCH_SYNC_CORE_DCACHE */264265/*266* r0 = &spinlock->lock267*268* Clobbers: r3:0, p1:0269*/270ENTRY(___raw_spin_is_locked_asm)271p1 = r0;272[--sp] = rets;273call _get_core_lock;274r3 = [p1];275cc = bittst( r3, 0 );276r3 = cc;277r1 = p1;278call _put_core_lock;279rets = [sp++];280r0 = r3;281rts;282ENDPROC(___raw_spin_is_locked_asm)283284/*285* r0 = &spinlock->lock286*287* Clobbers: r3:0, p1:0288*/289ENTRY(___raw_spin_lock_asm)290p1 = r0;291[--sp] = rets;292.Lretry_spinlock:293call _get_core_lock;294r1 = p1;295r2 = [p1];296cc = bittst( r2, 0 );297if cc jump .Lbusy_spinlock298#ifdef __ARCH_SYNC_CORE_DCACHE299r3 = p1;300bitset ( r2, 0 ); /* Raise the lock bit. */301[p1] = r2;302call _start_lock_coherent303#else304r2 = 1;305[p1] = r2;306call _put_core_lock;307#endif308rets = [sp++];309rts;310311.Lbusy_spinlock:312/* We don't touch the atomic area if busy, so that flush313will behave like nop in _put_core_lock. */314call _put_core_lock;315SSYNC(r2);316r0 = p1;317jump .Lretry_spinlock318ENDPROC(___raw_spin_lock_asm)319320/*321* r0 = &spinlock->lock322*323* Clobbers: r3:0, p1:0324*/325ENTRY(___raw_spin_trylock_asm)326p1 = r0;327[--sp] = rets;328call _get_core_lock;329r1 = p1;330r3 = [p1];331cc = bittst( r3, 0 );332if cc jump .Lfailed_trylock333#ifdef __ARCH_SYNC_CORE_DCACHE334bitset ( r3, 0 ); /* Raise the lock bit. */335[p1] = r3;336call _start_lock_coherent337#else338r2 = 1;339[p1] = r2;340call _put_core_lock;341#endif342r0 = 1;343rets = [sp++];344rts;345.Lfailed_trylock:346call _put_core_lock;347r0 = 0;348rets = [sp++];349rts;350ENDPROC(___raw_spin_trylock_asm)351352/*353* r0 = &spinlock->lock354*355* Clobbers: r2:0, p1:0356*/357ENTRY(___raw_spin_unlock_asm)358p1 = r0;359[--sp] = rets;360call _get_core_lock;361r2 = [p1];362bitclr ( r2, 0 );363[p1] = r2;364r1 = p1;365#ifdef __ARCH_SYNC_CORE_DCACHE366call _end_lock_coherent367#else368call _put_core_lock;369#endif370rets = [sp++];371rts;372ENDPROC(___raw_spin_unlock_asm)373374/*375* r0 = &rwlock->lock376*377* Clobbers: r2:0, p1:0378*/379ENTRY(___raw_read_lock_asm)380p1 = r0;381[--sp] = rets;382call _get_core_lock;383.Lrdlock_try:384r1 = [p1];385r1 += -1;386[p1] = r1;387cc = r1 < 0;388if cc jump .Lrdlock_failed389r1 = p1;390#ifdef __ARCH_SYNC_CORE_DCACHE391call _start_lock_coherent392#else393call _put_core_lock;394#endif395rets = [sp++];396rts;397398.Lrdlock_failed:399r1 += 1;400[p1] = r1;401.Lrdlock_wait:402r1 = p1;403call _put_core_lock;404SSYNC(r2);405r0 = p1;406call _get_core_lock;407r1 = [p1];408cc = r1 < 2;409if cc jump .Lrdlock_wait;410jump .Lrdlock_try411ENDPROC(___raw_read_lock_asm)412413/*414* r0 = &rwlock->lock415*416* Clobbers: r3:0, p1:0417*/418ENTRY(___raw_read_trylock_asm)419p1 = r0;420[--sp] = rets;421call _get_core_lock;422r1 = [p1];423cc = r1 <= 0;424if cc jump .Lfailed_tryrdlock;425r1 += -1;426[p1] = r1;427r1 = p1;428#ifdef __ARCH_SYNC_CORE_DCACHE429call _start_lock_coherent430#else431call _put_core_lock;432#endif433rets = [sp++];434r0 = 1;435rts;436.Lfailed_tryrdlock:437r1 = p1;438call _put_core_lock;439rets = [sp++];440r0 = 0;441rts;442ENDPROC(___raw_read_trylock_asm)443444/*445* r0 = &rwlock->lock446*447* Note: Processing controlled by a reader lock should not have448* any side-effect on cache issues with the other core, so we449* just release the core lock and exit (no _end_lock_coherent).450*451* Clobbers: r3:0, p1:0452*/453ENTRY(___raw_read_unlock_asm)454p1 = r0;455[--sp] = rets;456call _get_core_lock;457r1 = [p1];458r1 += 1;459[p1] = r1;460r1 = p1;461call _put_core_lock;462rets = [sp++];463rts;464ENDPROC(___raw_read_unlock_asm)465466/*467* r0 = &rwlock->lock468*469* Clobbers: r3:0, p1:0470*/471ENTRY(___raw_write_lock_asm)472p1 = r0;473r3.l = lo(RW_LOCK_BIAS);474r3.h = hi(RW_LOCK_BIAS);475[--sp] = rets;476call _get_core_lock;477.Lwrlock_try:478r1 = [p1];479r1 = r1 - r3;480#ifdef __ARCH_SYNC_CORE_DCACHE481r2 = r1;482r2 <<= 4;483r2 >>= 4;484cc = r2 == 0;485#else486cc = r1 == 0;487#endif488if !cc jump .Lwrlock_wait489[p1] = r1;490r1 = p1;491#ifdef __ARCH_SYNC_CORE_DCACHE492call _start_lock_coherent493#else494call _put_core_lock;495#endif496rets = [sp++];497rts;498499.Lwrlock_wait:500r1 = p1;501call _put_core_lock;502SSYNC(r2);503r0 = p1;504call _get_core_lock;505r1 = [p1];506#ifdef __ARCH_SYNC_CORE_DCACHE507r1 <<= 4;508r1 >>= 4;509#endif510cc = r1 == r3;511if !cc jump .Lwrlock_wait;512jump .Lwrlock_try513ENDPROC(___raw_write_lock_asm)514515/*516* r0 = &rwlock->lock517*518* Clobbers: r3:0, p1:0519*/520ENTRY(___raw_write_trylock_asm)521p1 = r0;522[--sp] = rets;523call _get_core_lock;524r1 = [p1];525r2.l = lo(RW_LOCK_BIAS);526r2.h = hi(RW_LOCK_BIAS);527cc = r1 == r2;528if !cc jump .Lfailed_trywrlock;529#ifdef __ARCH_SYNC_CORE_DCACHE530r1 >>= 28;531r1 <<= 28;532#else533r1 = 0;534#endif535[p1] = r1;536r1 = p1;537#ifdef __ARCH_SYNC_CORE_DCACHE538call _start_lock_coherent539#else540call _put_core_lock;541#endif542rets = [sp++];543r0 = 1;544rts;545546.Lfailed_trywrlock:547r1 = p1;548call _put_core_lock;549rets = [sp++];550r0 = 0;551rts;552ENDPROC(___raw_write_trylock_asm)553554/*555* r0 = &rwlock->lock556*557* Clobbers: r3:0, p1:0558*/559ENTRY(___raw_write_unlock_asm)560p1 = r0;561r3.l = lo(RW_LOCK_BIAS);562r3.h = hi(RW_LOCK_BIAS);563[--sp] = rets;564call _get_core_lock;565r1 = [p1];566r1 = r1 + r3;567[p1] = r1;568r1 = p1;569#ifdef __ARCH_SYNC_CORE_DCACHE570call _end_lock_coherent571#else572call _put_core_lock;573#endif574rets = [sp++];575rts;576ENDPROC(___raw_write_unlock_asm)577578/*579* r0 = ptr580* r1 = value581*582* Add a signed value to a 32bit word and return the new value atomically.583* Clobbers: r3:0, p1:0584*/585ENTRY(___raw_atomic_update_asm)586p1 = r0;587r3 = r1;588[--sp] = rets;589call _get_core_lock;590r2 = [p1];591r3 = r3 + r2;592[p1] = r3;593r1 = p1;594call _put_core_lock;595r0 = r3;596rets = [sp++];597rts;598ENDPROC(___raw_atomic_update_asm)599600/*601* r0 = ptr602* r1 = mask603*604* Clear the mask bits from a 32bit word and return the old 32bit value605* atomically.606* Clobbers: r3:0, p1:0607*/608ENTRY(___raw_atomic_clear_asm)609p1 = r0;610r3 = ~r1;611[--sp] = rets;612call _get_core_lock;613r2 = [p1];614r3 = r2 & r3;615[p1] = r3;616r3 = r2;617r1 = p1;618call _put_core_lock;619r0 = r3;620rets = [sp++];621rts;622ENDPROC(___raw_atomic_clear_asm)623624/*625* r0 = ptr626* r1 = mask627*628* Set the mask bits into a 32bit word and return the old 32bit value629* atomically.630* Clobbers: r3:0, p1:0631*/632ENTRY(___raw_atomic_set_asm)633p1 = r0;634r3 = r1;635[--sp] = rets;636call _get_core_lock;637r2 = [p1];638r3 = r2 | r3;639[p1] = r3;640r3 = r2;641r1 = p1;642call _put_core_lock;643r0 = r3;644rets = [sp++];645rts;646ENDPROC(___raw_atomic_set_asm)647648/*649* r0 = ptr650* r1 = mask651*652* XOR the mask bits with a 32bit word and return the old 32bit value653* atomically.654* Clobbers: r3:0, p1:0655*/656ENTRY(___raw_atomic_xor_asm)657p1 = r0;658r3 = r1;659[--sp] = rets;660call _get_core_lock;661r2 = [p1];662r3 = r2 ^ r3;663[p1] = r3;664r3 = r2;665r1 = p1;666call _put_core_lock;667r0 = r3;668rets = [sp++];669rts;670ENDPROC(___raw_atomic_xor_asm)671672/*673* r0 = ptr674* r1 = mask675*676* Perform a logical AND between the mask bits and a 32bit word, and677* return the masked value. We need this on this architecture in678* order to invalidate the local cache before testing.679*680* Clobbers: r3:0, p1:0681*/682ENTRY(___raw_atomic_test_asm)683p1 = r0;684r3 = r1;685r1 = -L1_CACHE_BYTES;686r1 = r0 & r1;687p0 = r1;688/* flush core internal write buffer before invalidate dcache */689CSYNC(r2);690flushinv[p0];691SSYNC(r2);692r0 = [p1];693r0 = r0 & r3;694rts;695ENDPROC(___raw_atomic_test_asm)696697/*698* r0 = ptr699* r1 = value700*701* Swap *ptr with value and return the old 32bit value atomically.702* Clobbers: r3:0, p1:0703*/704#define __do_xchg(src, dst) \705p1 = r0; \706r3 = r1; \707[--sp] = rets; \708call _get_core_lock; \709r2 = src; \710dst = r3; \711r3 = r2; \712r1 = p1; \713call _put_core_lock; \714r0 = r3; \715rets = [sp++]; \716rts;717718ENTRY(___raw_xchg_1_asm)719__do_xchg(b[p1] (z), b[p1])720ENDPROC(___raw_xchg_1_asm)721722ENTRY(___raw_xchg_2_asm)723__do_xchg(w[p1] (z), w[p1])724ENDPROC(___raw_xchg_2_asm)725726ENTRY(___raw_xchg_4_asm)727__do_xchg([p1], [p1])728ENDPROC(___raw_xchg_4_asm)729730/*731* r0 = ptr732* r1 = new733* r2 = old734*735* Swap *ptr with new if *ptr == old and return the previous *ptr736* value atomically.737*738* Clobbers: r3:0, p1:0739*/740#define __do_cmpxchg(src, dst) \741[--sp] = rets; \742[--sp] = r4; \743p1 = r0; \744r3 = r1; \745r4 = r2; \746call _get_core_lock; \747r2 = src; \748cc = r2 == r4; \749if !cc jump 1f; \750dst = r3; \7511: r3 = r2; \752r1 = p1; \753call _put_core_lock; \754r0 = r3; \755r4 = [sp++]; \756rets = [sp++]; \757rts;758759ENTRY(___raw_cmpxchg_1_asm)760__do_cmpxchg(b[p1] (z), b[p1])761ENDPROC(___raw_cmpxchg_1_asm)762763ENTRY(___raw_cmpxchg_2_asm)764__do_cmpxchg(w[p1] (z), w[p1])765ENDPROC(___raw_cmpxchg_2_asm)766767ENTRY(___raw_cmpxchg_4_asm)768__do_cmpxchg([p1], [p1])769ENDPROC(___raw_cmpxchg_4_asm)770771/*772* r0 = ptr773* r1 = bitnr774*775* Set a bit in a 32bit word and return the old 32bit value atomically.776* Clobbers: r3:0, p1:0777*/778ENTRY(___raw_bit_set_asm)779r2 = r1;780r1 = 1;781r1 <<= r2;782jump ___raw_atomic_set_asm783ENDPROC(___raw_bit_set_asm)784785/*786* r0 = ptr787* r1 = bitnr788*789* Clear a bit in a 32bit word and return the old 32bit value atomically.790* Clobbers: r3:0, p1:0791*/792ENTRY(___raw_bit_clear_asm)793r2 = r1;794r1 = 1;795r1 <<= r2;796jump ___raw_atomic_clear_asm797ENDPROC(___raw_bit_clear_asm)798799/*800* r0 = ptr801* r1 = bitnr802*803* Toggle a bit in a 32bit word and return the old 32bit value atomically.804* Clobbers: r3:0, p1:0805*/806ENTRY(___raw_bit_toggle_asm)807r2 = r1;808r1 = 1;809r1 <<= r2;810jump ___raw_atomic_xor_asm811ENDPROC(___raw_bit_toggle_asm)812813/*814* r0 = ptr815* r1 = bitnr816*817* Test-and-set a bit in a 32bit word and return the old bit value atomically.818* Clobbers: r3:0, p1:0819*/820ENTRY(___raw_bit_test_set_asm)821[--sp] = rets;822[--sp] = r1;823call ___raw_bit_set_asm824r1 = [sp++];825r2 = 1;826r2 <<= r1;827r0 = r0 & r2;828cc = r0 == 0;829if cc jump 1f830r0 = 1;8311:832rets = [sp++];833rts;834ENDPROC(___raw_bit_test_set_asm)835836/*837* r0 = ptr838* r1 = bitnr839*840* Test-and-clear a bit in a 32bit word and return the old bit value atomically.841* Clobbers: r3:0, p1:0842*/843ENTRY(___raw_bit_test_clear_asm)844[--sp] = rets;845[--sp] = r1;846call ___raw_bit_clear_asm847r1 = [sp++];848r2 = 1;849r2 <<= r1;850r0 = r0 & r2;851cc = r0 == 0;852if cc jump 1f853r0 = 1;8541:855rets = [sp++];856rts;857ENDPROC(___raw_bit_test_clear_asm)858859/*860* r0 = ptr861* r1 = bitnr862*863* Test-and-toggle a bit in a 32bit word,864* and return the old bit value atomically.865* Clobbers: r3:0, p1:0866*/867ENTRY(___raw_bit_test_toggle_asm)868[--sp] = rets;869[--sp] = r1;870call ___raw_bit_toggle_asm871r1 = [sp++];872r2 = 1;873r2 <<= r1;874r0 = r0 & r2;875cc = r0 == 0;876if cc jump 1f877r0 = 1;8781:879rets = [sp++];880rts;881ENDPROC(___raw_bit_test_toggle_asm)882883/*884* r0 = ptr885* r1 = bitnr886*887* Test a bit in a 32bit word and return its value.888* We need this on this architecture in order to invalidate889* the local cache before testing.890*891* Clobbers: r3:0, p1:0892*/893ENTRY(___raw_bit_test_asm)894r2 = r1;895r1 = 1;896r1 <<= r2;897jump ___raw_atomic_test_asm898ENDPROC(___raw_bit_test_asm)899900/*901* r0 = ptr902*903* Fetch and return an uncached 32bit value.904*905* Clobbers: r2:0, p1:0906*/907ENTRY(___raw_uncached_fetch_asm)908p1 = r0;909r1 = -L1_CACHE_BYTES;910r1 = r0 & r1;911p0 = r1;912/* flush core internal write buffer before invalidate dcache */913CSYNC(r2);914flushinv[p0];915SSYNC(r2);916r0 = [p1];917rts;918ENDPROC(___raw_uncached_fetch_asm)919920921