/*1Copyright 2003 Richard Curnow, SuperH (UK) Ltd.23This file is subject to the terms and conditions of the GNU General Public4License. See the file "COPYING" in the main directory of this archive5for more details.67Tight version of mempy for the case of just copying a page.8Prefetch strategy empirically optimised against RTL simulations9of SH5-101 cut2 eval chip with Cayman board DDR memory.1011Parameters:12r2 : destination effective address (start of page)13r3 : source effective address (start of page)1415Always copies 4096 bytes.1617Points to review.18* Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.19It seems like the prefetch needs to be at at least 4 lines ahead to get20the data into the cache in time, and the allocos contend with outstanding21prefetches for the same cache set, so it's better to have the numbers22different.23*/2425.section .text..SHmedia32,"ax"26.little2728.balign 829.global copy_page30copy_page:3132/* Copy 4096 bytes worth of data from r3 to r2.33Do prefetches 4 lines ahead.34Do alloco 2 lines ahead */3536pta 1f, tr137pta 2f, tr238pta 3f, tr339ptabs r18, tr04041#if 042/* TAKum03020 */43ld.q r3, 0x00, r6344ld.q r3, 0x20, r6345ld.q r3, 0x40, r6346ld.q r3, 0x60, r6347#endif48alloco r2, 0x0049synco ! TAKum0302050alloco r2, 0x2051synco ! TAKum030205253movi 3968, r654add r2, r6, r655addi r6, 64, r756addi r7, 64, r857sub r3, r2, r6058addi r60, 8, r6159addi r61, 8, r6260addi r62, 8, r2361addi r60, 0x80, r226263/* Minimal code size. The extra branches inside the loop don't cost much64because they overlap with the time spent waiting for prefetches to65complete. */661:67#if 068/* TAKum03020 */69bge/u r2, r6, tr2 ! skip prefetch for last 4 lines70ldx.q r2, r22, r63 ! prefetch 4 lines hence71#endif722:73bge/u r2, r7, tr3 ! skip alloco for last 2 lines74alloco r2, 0x40 ! alloc destination line 2 lines ahead75synco ! TAKum03020763:77ldx.q r2, r60, r3678ldx.q r2, r61, r3779ldx.q r2, r62, r3880ldx.q r2, r23, r3981st.q r2, 0, r3682st.q r2, 8, r3783st.q r2, 16, r3884st.q r2, 24, r3985addi r2, 32, r286bgt/l r8, r2, tr18788blink tr0, r63 ! return899091