CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/Data/Encoding/Shiftjis.h
Views: 1401
#pragma once12#include <cstdint>34// Warning: decodes/encodes JIS, not Unicode.5// Use a table to map.6struct ShiftJIS {7static const uint32_t INVALID = (uint32_t) -1;89ShiftJIS(const char *c) : c_(c), index_(0) {}1011uint32_t next() {12uint32_t j = (uint8_t)c_[index_++];1314int row;15bool emojiAdjust = false;16switch (j >> 4) {17case 0x8:18if (j == 0x80) {19return INVALID;20}21// Intentional fall-through.22case 0x9:23case 0xE:24row = ((j & 0x3F) << 1) - 0x01;25break;2627case 0xF:28emojiAdjust = true;29if (j < 0xF4) {30row = ((j & 0x7F) << 1) - 0x59;31} else if (j < 0xFD) {32row = ((j & 0x7F) << 1) - 0x1B;33} else {34return j;35}36break;3738// Anything else (i.e. <= 0x7x, 0xAx, 0xBx, 0xCx, and 0xDx) is JIS X 0201, return directly.39default:40return j;41}4243// Okay, if we didn't return, it's time for the second byte (the cell.)44j = (uint8_t)c_[index_++];45// Not a valid second byte.46if (j < 0x40 || j == 0x7F || j >= 0xFD) {47return INVALID;48}4950if (j >= 0x9F) {51// This range means the row was even.52++row;53j -= 0x7E;54} else {55if (j >= 0x80) {56j -= 0x20;57} else {58// Yuck. They wrapped around 0x7F, so we subtract one less.59j -= 0x20 - 1;60}6162if (emojiAdjust) {63// These are shoved in where they'll fit.64if (row == 0x87) {65// First byte was 0xF0.66row = 0x81;67} else if (row == 0x8B) {68// First byte was 0xF2.69row = 0x85;70} else if (row == 0xCD) {71// First byte was 0xF4.72row = 0x8F;73}74}75}7677// j is already the cell + 0x20.78return ((row + 0x20) << 8) | j;79}8081bool end() const {82return c_[index_] == 0;83}8485int length() const {86int len = 0;87for (ShiftJIS dec(c_); !dec.end(); dec.next())88++len;89return len;90}9192int byteIndex() const {93return index_;94}9596static int encode(char *dest, uint32_t j) {97int row = (j >> 8) - 0x20;98int offsetCell = j & 0xFF;99100// JIS X 0201.101if ((j & ~0xFF) == 0) {102*dest = j;103return 1;104}105106if (row < 0x3F) {107*dest++ = 0x80 + ((row + 1) >> 1);108} else if (row < 0x5F) {109// Reduce by 0x40 to account for the above range.110*dest++ = 0xE0 + ((row - 0x40 + 1) >> 1);111} else if (row >= 0x80) {112// TODO113}114115if (row & 1) {116if (offsetCell < 0x60) {117// Subtract one to shift around 0x7F.118*dest++ = offsetCell + 0x20 - 1;119} else {120*dest++ = offsetCell + 0x20;121}122} else {123*dest++ = offsetCell + 0x7E;124}125126return 2;127}128129static int encodeUnits(uint32_t j) {130if ((j & ~0xFF) == 0) {131return 1;132}133return 2;134}135136private:137const char *c_;138int index_;139};140141142