rappaurio-sae501_502/app/node_modules/cejs/data/character.js

/**
 * @name CeL function for character encoding
 * @fileoverview 本檔案包含了文字/字元編碼用的 functions。文字コード変換ライブラリ
 *
 * @example <code>
 * CeL.run('data.character',function(){
 * 	// ..
 * });
 * </code>
 *
 * @since 2017/1/22 22:38:52
 *
 * @see [[en:binary-to-text encoding]], [[en:character encoding]]
 */

'use strict';
// 'use asm';

// --------------------------------------------------------------------------------------------

typeof CeL === 'function' && CeL.run({
	// module name
	name : 'data.character',

	// for String.prototype.chars()
	require : 'data.native.',

	// 設定不匯出的子函式。
	no_extend : 'add_map',

	// 為了方便格式化程式碼，因此將 module 函式主體另外抽出。
	code : module_code
});

function module_code(library_namespace) {

	var module_name = this.id;

	/**
	 * null module constructor
	 *
	 * @class 處理編碼的 functions
	 */
	var _// JSDT:_module_
	= function() {
		// null module constructor
	};

	/**
	 * for JSDT: 有 prototype 才會將之當作 Class
	 */
	_// JSDT:_module_
	.prototype = {};

	// TODO
	function Base64(text) {
		var index = 0, length = text.length, result = [];
		for (; index < length; index++) {
			// Buffer.from('編碼');
		}
		return result.join('');
	}

	// =============================================================================================
	// character encoding 字元編碼

	var
	/** {Natural}base of 16 bit */
	HEX_BASE = 0x10,
	// 取得正式名稱。預先設定以供load_code_map()使用。
	// [經過normalize_encoding_name(encoding)之前期處理的key]
	// = module/file name below data/encoding/
	code_of_alias = {
		big5 : 'Big5',
		gbk : 'GBK',
		// 因為以實用性為主，因此全部導向到擴張至最大的最新字碼。
		gb2312 : 'GBK',
		eucjp : 'EUC-JP',
		shiftjis : 'Shift_JIS',
		sjis : 'Shift_JIS'
	},
	// coding map / config hash for decoding specified coding to Unicode.
	// map_set[encoding_name]
	// = [ config, [1 byte map], [2 byte map], [3 byte map], [4 byte map] ]
	map_set = Object.create(null),
	// encoding Unicode to specified coding
	// encode_map_set[encoding_name]
	// = {Unicode_char:char_code}
	encode_map_set = Object.create(null),
	/** {String}REPLACEMENT CHARACTER U+FFFD, '?' in old IE */
	UNKNOWN_CHARACTER = '<27>', UNKNOWN_CHARACTER_CODE = UNKNOWN_CHARACTER
			.codePointAt(0);

	// _.map_set = map_set;

	function normalize_encoding_name(encoding) {
		encoding = String(encoding).trim();
		return code_of_alias[encoding.toLowerCase().replace(/[-_ ]+/g, '')]
				|| encoding;
	}

	function encoding_is_loaded(encoding) {
		return normalize_encoding_name(encoding) in map_set;
	}

	_.is_loaded = encoding_is_loaded;

	/**
	 * <code>

	encoding.map.json規格書:包含map:
	{
		// to single byte / 2 or multi bytes set, continuous, split by /./u:
		start_char_code_in_hex:'map',
		// ** deprecated: to single byte / 2 bytes set, continuous, .split('split string'):
		start_char_code_in_hex:['map', 'split string'],
		// ** deprecated: 2 bytes set, .split('split string'):
		start_char_code_in_hex:[start of second byte, 'map', 'split string'],
		// ** deprecated: .split(''):
		start_char_code_in_hex:[start of second byte, 'map', ''],
		// ** deprecated: split by /./u:
		start_char_code_in_hex:[start of second byte, 'map'],
		// ** deprecated: convert single code to single string
		start_char_code_in_hex:['map', 0],
		// 這邊的count表示中間有count個字元，分別是自char開始，unicode編碼之後的序列。
		start_char_code_in_hex:['char', {Natural}count, 'char', {Natural}count],
	}

	e.g.,
	// split by .chars(true)
	{'A180':[0x80,'~~~~~~'],'A4B3':'##'}
	// .split('')
	{'A180':[0x80,'~~~~~~', ''],'A4B3':['#,#',',']}

	to_multi的不能跨越to_single的範圍。
	e.g.,
	{'A1FF':[0xFF,'abcde'],'A2FF':'12','A4B3':'~'}
	'A2FF','A4B3': 不在'A1FF'範圍內: A1FF:a, A2FF:b, A3FF:c, ...
	實作將直接以+1的方式配入 convert_map 中，因此A2FF之第二組"2"將被配入A300!

	</code>
	 *
	 * @see [[en:character encoding]]
	 *      https://github.com/ashtuchkin/iconv-lite/tree/master/encodings/tables
	 */

	/**
	 *
	 * @param {String}code_name
	 *            encoding name
	 * @param {Object}map_data
	 */
	function add_code_map(code_name, map_data) {
		library_namespace.debug(code_name, 1, 'add_code_map');
		var encoding = normalize_encoding_name(code_name);
		if (!(encoding in code_of_alias)) {
			code_of_alias[encoding] = code_name;
		}

		if (!map_set[encoding]) {
			// 不重新設定，以允許多次設定。
			map_set[encoding] = [];
		}
		var code_map = map_set[encoding], config = code_map,
		// main_encode_map[Unicode character]
		// = {ℕ⁰:Natural+0}code of specified coding
		main_encode_map = encode_map_set[encoding]
				|| (encode_map_set[encoding] = Object.create(null));
		// console.log(Object.keys(map_data));
		for ( var key in map_data) {
			var char_list = map_data[key], matched = key
					.match(/^_?([\dA-F]+)$/i);
			// console.log([ key, matched, char_list ]);
			if (!matched) {
				// console.log(key);
				// config?
				code_map[key] = char_list;
				continue;
			}

			var base_byte_code = matched[1],
			//
			char_code = parseInt(base_byte_code, HEX_BASE),
			//
			main_map = Math.ceil(base_byte_code.length / 2);
			main_map = code_map[main_map]
			// initialize 稀疏矩陣。
			|| (code_map[main_map] = []);

			if (typeof char_list === 'string') {
				char_list.chars(true).forEach(function(character) {
					if (main_encode_map[character]) {
						library_namespace.debug(code_name
						// http://founder.acgvlyric.org/iu/doku.php/%E9%80%A0%E5%AD%97:%E5%BA%8F_%E5%B8%B8%E7%94%A8%E9%A6%99%E6%B8%AF%E5%A4%96%E5%AD%97%E8%A1%A8
						+ ': character mapping ['
						// 除了少數幾個特殊的字之外，其他大部分都對應到後來指定的字碼。
						+ character + ']: 0x'
						// @see data/character/Big5.js
						+ main_encode_map[character].toString(16).toUpperCase()
						// e.g., "包" should be A55D in Big5, not FABD
						+ ' → 0x'
						// "者" should be AACC in Big5, not 8ECD
						+ char_code.toString(16).toUpperCase(),
						//
						2, 'add_code_map');
					}
					// register
					main_encode_map[character] = char_code;

					// 為了能辨識，無論哪種都還是得設定這個對應 to Unicode。
					main_map[char_code++] = character;
				});
				continue;
			}

			if (!Array.isArray(char_list)) {
				library_namespace.error('Not Array: '
						+ JSON.stringify(char_list));
				throw new Error('Invalid character code map: ' + code_name
						+ '.' + base_byte_code);
			}

			// start_first_byte
			if (!('start_byte_code' in config)) {
				if (char_code === 0 && char_list[0] === '\u0000'
						&& char_list[1] > 0) {
					char_list.shift();
					// e.g., ['\0',2] → byte code < 2+1 的都能直接轉string。
					config.start_byte_code = char_list.shift() + 1;
				} else {
					config.start_byte_code = 0;
				}
			}

			// console.log(char_list);
			var last_char_code;
			char_list.forEach(function(slice, index) {
				if (typeof slice === 'string') {
					char_list = slice.chars(true);
					char_list.forEach(function(character) {
						// register
						main_encode_map[character] = char_code;
						main_map[char_code++] = character;
					});
					last_char_code = char_list.at(-1).codePointAt(0);
					return;
				}

				if (!(last_char_code >= 0) || !(slice > 0)) {
					throw new Error('Invalid character of code map: '
							+ code_name + '.' + base_byte_code + '.' + slice);
				}

				// console.log([last_char_code, slice]);
				var end = last_char_code + slice;
				while (last_char_code < end) {
					// register
					var character = String.fromCodePoint(++last_char_code);
					main_encode_map[character] = char_code;
					main_map[char_code++] = character;
				}
			});
		}
	}

	_.add_map = add_code_map;

	function load_code_map(encoding_list, callback) {
		if (!Array.isArray(encoding_list)) {
			encoding_list = [ encoding_list ];
		}

		encoding_list = encoding_list.map(normalize_encoding_name);

		// resources need to load
		var resources_path_list = [];

		encoding_list.forEach(function(encoding) {
			if (!(encoding in map_set)) {
				resources_path_list.push(library_namespace.get_module_path(
						module_name, encoding + '.js'));
			}
		})

		if (resources_path_list.length === 0) {
			callback && callback();
			return true;
		}

		if (resources_path_list.length === 1) {
			resources_path_list = resources_path_list[0];
		}
		library_namespace.debug(resources_path_list, 1, 'load_code_map');
		library_namespace.run(resources_path_list, callback);
	}

	_.load = load_code_map;

	// ===============================================================

	// String.prototype.encode(), string.encode()
	function String_to_code(encoding, options) {
		encoding = normalize_encoding_name(encoding);

		// 4: 保險用，幾乎都夠用，卻仍舊不能保證。
		var buffer = Buffer.allocUnsafe(4 * this.length), index = 0,
		// main_encode_map[Unicode character]
		// = {ℕ⁰:Natural+0}code of specified coding
		main_encode_map = encode_map_set[encoding],
		//
		start_byte_code = map_set[encoding]
				&& map_set[encoding].start_byte_code;

		if (!main_encode_map) {
			throw new Error('Unknown encoding: ' + encoding
					+ '. You may need to ' + module_name + '.load("' + encoding
					+ '") first?');
		}

		// TODO: 對於不是以character分割，以及雙/多位元卻是0x0000的情況需要特別處理（這裡會被當作0x00而非0x0000）!
		this.chars(true).forEach(function(character) {
			var code = character.charCodeAt(0);
			if (code < start_byte_code) {
				buffer[index++] = code;
				return;
			}

			var _i = code = (main_encode_map[character]
			//
			|| UNKNOWN_CHARACTER_CODE) | 0, end = index;
			// 8: 0x100=2^8
			while ((_i >>= 8) > 0) {
				end++;
			}
			_i = end;
			while (true) {
				buffer[_i] = code % 0x100;
				if (--_i < index) {
					break;
				}
				code >>= 8;
			}
			index = end + 1;
		});

		// assert: buffer.length >= index
		return buffer.slice(0, index);
	}

	// ===============================================================

	if (library_namespace.platform.nodejs) {
		// Buffer.prototype.to_UTF8;
		// Buffer.prototype.to_Big5;
		// Buffer.prototype.to_EUC_JP;

		// cache original Buffer.prototype.toString
		Buffer.prototype.native_toString = Buffer.prototype.toString;
		/** @deprecated */
		Buffer.prototype.toString = function deprecated_Buffer_toString(
				encoding) {
			var endoding_error;
			try {
				return this.native_toString(encoding);
			} catch (e) {
				endoding_error = e;
			}

			try {
				return code_array_to_String.call(this, encoding);
			} catch (e) {
				// throw e;
				throw endoding_error;
			}
		};

		// 把 Buffer 物件的內容當作是 encoding 編碼，並解析成 {String}UTF-8 string。
		Buffer.prototype.toString = function Buffer_toString(encoding, options) {
			try {
				// buffer.toString(null) will throw!
				return this.native_toString(encoding);
			} catch (e) {
			}

			// 有錯誤直接丟出去。
			return code_array_to_String.call(this, encoding, options);
		};

		// TODO: use StringDecoder
	}

	if (false) {
		CeL.run('data.character');
		CeL.character.load('Big-5', function() {
			console.assert('作輩' === Buffer.from('A740BDFA', 'hex').toString(
					'Big-5'));
			var text = '做基本檢測。';
			console.assert(text === text.encode('Big_5').toString('Big 5'));
		});
	}

	// assert: this = [ byte_code, byte_code, ... ]
	function code_array_to_String(encoding, options) {
		// check if we can convert the encoding.
		encoding = normalize_encoding_name(encoding);
		var code_map = map_set[encoding];
		if (!code_map) {
			// Unknown encoding: e.
			// You may need to run CeL.data.character.load("e") first?
			throw new Error('Unknown encoding: ' + encoding
					+ '. You may need to run ' + module_name + '.load("'
					+ encoding + '") first?');
		}
		// console.log(code_map);

		var code_index = 0,
		// converted result
		converted = '';
		for (var start_byte_code = code_map.start_byte_code, reminder = 0, max_byte = code_map.length,
		// main loop to decode to default inner encoding (Unicode).
		byte_index = 0, length = this.length; byte_index < length; byte_index++) {
			if (code_index === 0) {
				reminder = this[byte_index];
				if (reminder < start_byte_code) {
					converted += String.fromCharCode(reminder);
					continue;
				}
			} else {
				reminder = reminder * 0x100 + this[byte_index];
			}

			if (++code_index === max_byte) {
				// 自這次搜尋開始，無法找到能mapping的character。
				converted += UNKNOWN_CHARACTER;
				// rollback至自這次搜尋開始後的下一個byte。
				byte_index -= code_index - 2;
				// reset
				reminder = code_index = 0;
				continue;
			}

			var map_single = code_map[code_index];
			if (false) {
				library_namespace.debug('Test ' + code_index + ' bytes: '
						+ reminder.toString(HEX_BASE), 6,
						'code_array_to_String');
				if (map_single) {
					library_namespace.debug(map_single.slice(Math.max(0,
							reminder - 9), reminder + 9), 9,
							'code_array_to_String');
				}
			}
			if (map_single && (reminder in map_single)) {
				// find
				converted += map_single[reminder];
				// reset
				reminder = code_index = 0;
			}
		}

		if (code_index > 0) {
			converted += UNKNOWN_CHARACTER;
		}

		return converted;
	}

	// ---------------------------------------------------------------

	function Array_to_String(encoding, options) {
		var array = this.map(function(byte, index) {
			// 做基本檢測。
			if (typeof byte === 'string' && byte.length === 1) {
				byte = byte.charCodeAt(0);
			}
			if (typeof byte === 'number' && 0 <= byte && byte < 0x100
					&& ((byte | 0) === byte)) {
				return byte;
			}
			throw new Error('Invalid byte: [' + index + '] ' + byte);
		});

		return code_array_to_String.call(array, encoding, options);
	}

	library_namespace.set_method(Array.prototype, {
		decode : Array_to_String
	});

	library_namespace.set_method(String.prototype, {
		encode : String_to_code,
		// assert: /^[\x00-\xFF]*$/i.test(this)
		decode : function decode_as_byte_String(encoding, options) {
			if (false && !/^[\x00-\xFF]*$/i.test(this)) {
				throw new Error('Invalid byte: [' + index + '] ' + byte);
			}
			// use Array_to_String()
			return this.split('').decode(encoding, options);
		}
	});

	// ---------------------------------------------------------------

	// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent
	// /^[*\-._0-9A-Za-z]$/
	var PATTERN_has_URI_component_invalid_character = /[^a-zA-Z0-9\-_.!~*'()]/;
	// _.PATTERN_has_URI_component_invalid_character =
	// PATTERN_has_URI_component_invalid_character;
	// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
	var PATTERN_has_URI_invalid_character = /[^a-zA-Z0-9;,/?:@&=+$\-_.!~*'()#]/;
	// _.PATTERN_has_URI_invalid_character = PATTERN_has_URI_invalid_character;

	/**
	 * @see https://www.w3.org/TR/html5/forms.html#url-encoded-form-data
	 */
	var encode_URI_component_base_map = [];
	// " "→"+"
	encode_URI_component_base_map[0x20] = '+';
	(function() {
		for (var code = 0x2A; code < 0x7A; code++) {
			var character = String.fromCharCode(code);
			if (!PATTERN_has_URI_component_invalid_character.test(character)) {
				encode_URI_component_base_map[code] = character;
			}
		}
	})();

	/**
	 *
	 * @param {String}string
	 * @param [encoding]
	 * @returns
	 */
	function encode_URI_component(string, encoding) {
		if (!encoding || /^UTF-?8$/i.test(encoding)) {
			// fallback: native methods are faster
			return encodeURIComponent(string);
		}
		// charset
		encoding = normalize_encoding_name(encoding);

		if (false) {
			// for pure Big5, no 香港增補字符集
			string = string.replace(/[―喰蔃瀞靝鼗弌鍮蠏覩瑨牐]/g, function(char) {
				return '&#' + char.charCodeAt(0) + ';';
			});
		}

		var encoded = '';
		string.encode(encoding).forEach(function(byte) {
			encoded += byte in encode_URI_component_base_map
			//
			? encode_URI_component_base_map[byte]
			//
			: '%' + byte.toString(0x10).toUpperCase();
		});
		return encoded;
	}

	_.encode_URI_component = encode_URI_component;

	function encode_URI(string, encoding) {
		if (!encoding || /^UTF-?8$/i.test(encoding)) {
			// fallback: native methods are faster
			return encodeURI(string);
		}
		// charset
		encoding = normalize_encoding_name(encoding);

		var encoded = '';
		string.encode(encoding).forEach(function(byte) {
			encoded += (byte in encode_URI_component_base_map)
			//
			&& !PATTERN_has_URI_invalid_character.test(byte)
			//
			? encode_URI_component_base_map[byte]
			//
			: '%' + byte.toString(0x10).toUpperCase();
		});
		return encoded;
	}

	_.encode_URI = encode_URI;

	/**
	 * @see http://qiita.com/weal/items/3b3ddfb8157047119554
	 *      http://polygon-planet-log.blogspot.tw/2012/04/javascript.html
	 */
	function decode_URI_component(encoded, encoding) {
		if (!encoding || /^UTF-?8$/i.test(encoding)) {
			// fallback
			return decodeURIComponent(encoded);
		}
		// charset
		encoding = normalize_encoding_name(encoding);

		var string = '', buffer = [], PATTERN = /%([\dA-F]{2})|[\s\S]/ig, matched, code;
		while (matched = PATTERN.exec(encoded)) {
			if (matched[1]) {
				buffer.push(parseInt(matched[1], 0x10));
			} else if ((matched = matched[0]) === '+') {
				// "+"→" "
				buffer.push(0x20);
			} else if ((code = matched.charCodeAt(0)) < 0x100) {
				buffer.push(code);
			} else {
				if (buffer.length > 0) {
					string += code_array_to_String.call(buffer, encoding);
					buffer.length = 0;
				}
				string += matched;
			}
		}

		if (buffer.length > 0) {
			string += code_array_to_String.call(buffer, encoding);
		}
		return string;
	}

	_.decode_URI_component = decode_URI_component;
	// https://www.geeksforgeeks.org/difference-between-decodeuricomponent-and-decodeuri-functions-in-javascript/
	// decodeURI(): It takes encodeURI(url) string so it cannot decoded
	// characters (, / ? : @ & = + $ #)
	// TODO: decodeURI("%26") === "%26" && decodeURIComponent("%26") === "&"
	_.decode_URI = decode_URI_component;

	// ---------------------------------------------------------------

	return (_// JSDT:_module_
	);
}