/*1* Copyright 2011 Tilera Corporation. All Rights Reserved.2*3* This program is free software; you can redistribute it and/or4* modify it under the terms of the GNU General Public License5* as published by the Free Software Foundation, version 2.6*7* This program is distributed in the hope that it will be useful, but8* WITHOUT ANY WARRANTY; without even the implied warranty of9* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or10* NON INFRINGEMENT. See the GNU General Public License for11* more details.12*/1314#include <arch/chip.h>1516#include <linux/types.h>17#include <linux/string.h>18#include <linux/module.h>1920#undef memset2122void *memset(void *s, int c, size_t n)23{24uint64_t *out64;25int n64, to_align64;26uint64_t v64;27uint8_t *out8 = s;2829/* Experimentation shows that a trivial tight loop is a win up until30* around a size of 20, where writing a word at a time starts to win.31*/32#define BYTE_CUTOFF 203334#if BYTE_CUTOFF < 735/* This must be at least at least this big, or some code later36* on doesn't work.37*/38#error "BYTE_CUTOFF is too small"39#endif4041if (n < BYTE_CUTOFF) {42/* Strangely, this turns out to be the tightest way to43* write this loop.44*/45if (n != 0) {46do {47/* Strangely, combining these into one line48* performs worse.49*/50*out8 = c;51out8++;52} while (--n != 0);53}5455return s;56}5758/* Align 'out8'. We know n >= 7 so this won't write past the end. */59while (((uintptr_t) out8 & 7) != 0) {60*out8++ = c;61--n;62}6364/* Align 'n'. */65while (n & 7)66out8[--n] = c;6768out64 = (uint64_t *) out8;69n64 = n >> 3;7071/* Tile input byte out to 64 bits. */72/* KLUDGE */73v64 = 0x0101010101010101ULL * (uint8_t)c;7475/* This must be at least 8 or the following loop doesn't work. */76#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)7778/* Determine how many words we need to emit before the 'out32'79* pointer becomes aligned modulo the cache line size.80*/81to_align64 = (-((uintptr_t)out64 >> 3)) &82(CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1);8384/* Only bother aligning and using wh64 if there is at least85* one full cache line to process. This check also prevents86* overrunning the end of the buffer with alignment words.87*/88if (to_align64 <= n64 - CACHE_LINE_SIZE_IN_DOUBLEWORDS) {89int lines_left;9091/* Align out64 mod the cache line size so we can use wh64. */92n64 -= to_align64;93for (; to_align64 != 0; to_align64--) {94*out64 = v64;95out64++;96}9798/* Use unsigned divide to turn this into a right shift. */99lines_left = (unsigned)n64 / CACHE_LINE_SIZE_IN_DOUBLEWORDS;100101do {102/* Only wh64 a few lines at a time, so we don't103* exceed the maximum number of victim lines.104*/105int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS())106? lines_left107: CHIP_MAX_OUTSTANDING_VICTIMS());108uint64_t *wh = out64;109int i = x;110int j;111112lines_left -= x;113114do {115__insn_wh64(wh);116wh += CACHE_LINE_SIZE_IN_DOUBLEWORDS;117} while (--i);118119for (j = x * (CACHE_LINE_SIZE_IN_DOUBLEWORDS / 4);120j != 0; j--) {121*out64++ = v64;122*out64++ = v64;123*out64++ = v64;124*out64++ = v64;125}126} while (lines_left != 0);127128/* We processed all full lines above, so only this many129* words remain to be processed.130*/131n64 &= CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1;132}133134/* Now handle any leftover values. */135if (n64 != 0) {136do {137*out64 = v64;138out64++;139} while (--n64 != 0);140}141142return s;143}144EXPORT_SYMBOL(memset);145146147