/*1* fs/cifs/cifs_unicode.c2*3* Copyright (c) International Business Machines Corp., 2000,20094* Modified by Steve French ([email protected])5*6* This program is free software; you can redistribute it and/or modify7* it under the terms of the GNU General Public License as published by8* the Free Software Foundation; either version 2 of the License, or9* (at your option) any later version.10*11* This program is distributed in the hope that it will be useful,12* but WITHOUT ANY WARRANTY; without even the implied warranty of13* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See14* the GNU General Public License for more details.15*16* You should have received a copy of the GNU General Public License17* along with this program; if not, write to the Free Software18* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA19*/20#include <linux/fs.h>21#include <linux/slab.h>22#include "cifs_unicode.h"23#include "cifs_uniupr.h"24#include "cifspdu.h"25#include "cifsglob.h"26#include "cifs_debug.h"2728/*29* cifs_ucs2_bytes - how long will a string be after conversion?30* @ucs - pointer to input string31* @maxbytes - don't go past this many bytes of input string32* @codepage - destination codepage33*34* Walk a ucs2le string and return the number of bytes that the string will35* be after being converted to the given charset, not including any null36* termination required. Don't walk past maxbytes in the source buffer.37*/38int39cifs_ucs2_bytes(const __le16 *from, int maxbytes,40const struct nls_table *codepage)41{42int i;43int charlen, outlen = 0;44int maxwords = maxbytes / 2;45char tmp[NLS_MAX_CHARSET_SIZE];46__u16 ftmp;4748for (i = 0; i < maxwords; i++) {49ftmp = get_unaligned_le16(&from[i]);50if (ftmp == 0)51break;5253charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);54if (charlen > 0)55outlen += charlen;56else57outlen++;58}5960return outlen;61}6263/*64* cifs_mapchar - convert a host-endian char to proper char in codepage65* @target - where converted character should be copied66* @src_char - 2 byte host-endian source character67* @cp - codepage to which character should be converted68* @mapchar - should character be mapped according to mapchars mount option?69*70* This function handles the conversion of a single character. It is the71* responsibility of the caller to ensure that the target buffer is large72* enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).73*/74static int75cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,76bool mapchar)77{78int len = 1;7980if (!mapchar)81goto cp_convert;8283/*84* BB: Cannot handle remapping UNI_SLASH until all the calls to85* build_path_from_dentry are modified, as they use slash as86* separator.87*/88switch (src_char) {89case UNI_COLON:90*target = ':';91break;92case UNI_ASTERISK:93*target = '*';94break;95case UNI_QUESTION:96*target = '?';97break;98case UNI_PIPE:99*target = '|';100break;101case UNI_GRTRTHAN:102*target = '>';103break;104case UNI_LESSTHAN:105*target = '<';106break;107default:108goto cp_convert;109}110111out:112return len;113114cp_convert:115len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);116if (len <= 0) {117*target = '?';118len = 1;119}120goto out;121}122123/*124* cifs_from_ucs2 - convert utf16le string to local charset125* @to - destination buffer126* @from - source buffer127* @tolen - destination buffer size (in bytes)128* @fromlen - source buffer size (in bytes)129* @codepage - codepage to which characters should be converted130* @mapchar - should characters be remapped according to the mapchars option?131*132* Convert a little-endian ucs2le string (as sent by the server) to a string133* in the provided codepage. The tolen and fromlen parameters are to ensure134* that the code doesn't walk off of the end of the buffer (which is always135* a danger if the alignment of the source buffer is off). The destination136* string is always properly null terminated and fits in the destination137* buffer. Returns the length of the destination string in bytes (including138* null terminator).139*140* Note that some windows versions actually send multiword UTF-16 characters141* instead of straight UCS-2. The linux nls routines however aren't able to142* deal with those characters properly. In the event that we get some of143* those characters, they won't be translated properly.144*/145int146cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen,147const struct nls_table *codepage, bool mapchar)148{149int i, charlen, safelen;150int outlen = 0;151int nullsize = nls_nullsize(codepage);152int fromwords = fromlen / 2;153char tmp[NLS_MAX_CHARSET_SIZE];154__u16 ftmp;155156/*157* because the chars can be of varying widths, we need to take care158* not to overflow the destination buffer when we get close to the159* end of it. Until we get to this offset, we don't need to check160* for overflow however.161*/162safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);163164for (i = 0; i < fromwords; i++) {165ftmp = get_unaligned_le16(&from[i]);166if (ftmp == 0)167break;168169/*170* check to see if converting this character might make the171* conversion bleed into the null terminator172*/173if (outlen >= safelen) {174charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);175if ((outlen + charlen) > (tolen - nullsize))176break;177}178179/* put converted char into 'to' buffer */180charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);181outlen += charlen;182}183184/* properly null-terminate string */185for (i = 0; i < nullsize; i++)186to[outlen++] = 0;187188return outlen;189}190191/*192* NAME: cifs_strtoUCS()193*194* FUNCTION: Convert character string to unicode string195*196*/197int198cifs_strtoUCS(__le16 *to, const char *from, int len,199const struct nls_table *codepage)200{201int charlen;202int i;203wchar_t wchar_to; /* needed to quiet sparse */204205for (i = 0; len && *from; i++, from += charlen, len -= charlen) {206charlen = codepage->char2uni(from, len, &wchar_to);207if (charlen < 1) {208cERROR(1, "strtoUCS: char2uni of 0x%x returned %d",209*from, charlen);210/* A question mark */211wchar_to = 0x003f;212charlen = 1;213}214put_unaligned_le16(wchar_to, &to[i]);215}216217put_unaligned_le16(0, &to[i]);218return i;219}220221/*222* cifs_strndup_from_ucs - copy a string from wire format to the local codepage223* @src - source string224* @maxlen - don't walk past this many bytes in the source string225* @is_unicode - is this a unicode string?226* @codepage - destination codepage227*228* Take a string given by the server, convert it to the local codepage and229* put it in a new buffer. Returns a pointer to the new string or NULL on230* error.231*/232char *233cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode,234const struct nls_table *codepage)235{236int len;237char *dst;238239if (is_unicode) {240len = cifs_ucs2_bytes((__le16 *) src, maxlen, codepage);241len += nls_nullsize(codepage);242dst = kmalloc(len, GFP_KERNEL);243if (!dst)244return NULL;245cifs_from_ucs2(dst, (__le16 *) src, len, maxlen, codepage,246false);247} else {248len = strnlen(src, maxlen);249len++;250dst = kmalloc(len, GFP_KERNEL);251if (!dst)252return NULL;253strlcpy(dst, src, len);254}255256return dst;257}258259/*260* Convert 16 bit Unicode pathname to wire format from string in current code261* page. Conversion may involve remapping up the six characters that are262* only legal in POSIX-like OS (if they are present in the string). Path263* names are little endian 16 bit Unicode on the wire264*/265int266cifsConvertToUCS(__le16 *target, const char *source, int srclen,267const struct nls_table *cp, int mapChars)268{269int i, j, charlen;270char src_char;271__le16 dst_char;272wchar_t tmp;273274if (!mapChars)275return cifs_strtoUCS(target, source, PATH_MAX, cp);276277for (i = 0, j = 0; i < srclen; j++) {278src_char = source[i];279charlen = 1;280switch (src_char) {281case 0:282put_unaligned(0, &target[j]);283goto ctoUCS_out;284case ':':285dst_char = cpu_to_le16(UNI_COLON);286break;287case '*':288dst_char = cpu_to_le16(UNI_ASTERISK);289break;290case '?':291dst_char = cpu_to_le16(UNI_QUESTION);292break;293case '<':294dst_char = cpu_to_le16(UNI_LESSTHAN);295break;296case '>':297dst_char = cpu_to_le16(UNI_GRTRTHAN);298break;299case '|':300dst_char = cpu_to_le16(UNI_PIPE);301break;302/*303* FIXME: We can not handle remapping backslash (UNI_SLASH)304* until all the calls to build_path_from_dentry are modified,305* as they use backslash as separator.306*/307default:308charlen = cp->char2uni(source + i, srclen - i, &tmp);309dst_char = cpu_to_le16(tmp);310311/*312* if no match, use question mark, which at least in313* some cases serves as wild card314*/315if (charlen < 1) {316dst_char = cpu_to_le16(0x003f);317charlen = 1;318}319}320/*321* character may take more than one byte in the source string,322* but will take exactly two bytes in the target string323*/324i += charlen;325put_unaligned(dst_char, &target[j]);326}327328ctoUCS_out:329return i;330}331332333334