URL encode and decode for non-utf8 encodings
Tính đến
Script này sẽ không được không được cài đặt trực tiếp. Nó là một thư viện cho các script khác để bao gồm các chỉ thị meta
// @require https://update.greasyfork.org/scripts/471280/1247074/URL%20Encoder.js
/* eslint-disable no-multi-spaces */
// ==UserScript==
// @name URL Encoder
// @namespace URL-Encoder
// @version 0.2.2
// @description URL encode and decode for non-utf8 encodings
// @author EtherDream, PY-DNG
// @license MIT
// ==/UserScript==
let $URL = (function () {
const str2big5 = (function () {
'use strict'
let table;
return str2big5;
function initBig5Table() {
// https://en.wikipedia.org/wiki/Big5
const ranges = [
[0xA1, 0xF9, 0x40, 0x7E],
[0xA1, 0xF9, 0xA1, 0xFE],
]
const codePoints = new Uint16Array(13973); // 13973 === (0xF9-0xA1+1)*(0x7E-0x40+1 + 0xFE-0xA1+1)
let i = 0;
for (const [b1Begin, b1End, b2Begin, b2End] of ranges) {
for (let b2 = b2Begin; b2 <= b2End; b2++) {
for (let b1 = b1Begin; b1 <= b1End; b1++) {
codePoints[i++] = b2 << 8 | b1;
}
}
}
table = {};
const str = [...new TextDecoder('big5').decode(codePoints)];
for (let i = 0; i < str.length; i++) {
table[str[i].charCodeAt(0)] = codePoints[i];
}
}
function str2big5(str) {
if (!table) {
initBig5Table();
}
const buf = [];
for (let i = 0; i < str.length; i++) {
const codePoint = str.codePointAt(i);
const code = String.fromCodePoint(codePoint);
i += code.length-1;
if (codePoint < 0x80) {
buf.push(codePoint);
continue;
}
const big5 = table[codePoint];
if (table.hasOwnProperty(codePoint)) {
const uarr = new Uint8Array(2);
uarr[0] = big5;
uarr[1] = big5 >> 8;
buf.push(uarr[0], uarr[1]);
} else {
const encoded = str2big5(`&#${codePoint};`);
for (const charcode of encoded) {
buf.push(charcode);
}
}
}
return buf;
}
}) ();
const str2gbk = (function () {
'use strict'
let table;
return str2gbk;
function initGbkTable() {
// https://en.wikipedia.org/wiki/GBK_(character_encoding)#Encoding
const ranges = [
[0xA1, 0xA9, 0xA1, 0xFE],
[0xB0, 0xF7, 0xA1, 0xFE],
[0x81, 0xA0, 0x40, 0xFE],
[0xAA, 0xFE, 0x40, 0xA0],
[0xA8, 0xA9, 0x40, 0xA0],
[0xAA, 0xAF, 0xA1, 0xFE],
[0xF8, 0xFE, 0xA1, 0xFE],
[0xA1, 0xA7, 0x40, 0xA0],
]
const codePoints = new Uint16Array(23940);
let i = 0;
for (const [b1Begin, b1End, b2Begin, b2End] of ranges) {
for (let b2 = b2Begin; b2 <= b2End; b2++) {
if (b2 !== 0x7F) {
for (let b1 = b1Begin; b1 <= b1End; b1++) {
codePoints[i++] = b2 << 8 | b1;
}
}
}
}
table = {}
const str = [...new TextDecoder('gbk').decode(codePoints)];
for (let i = 0; i < str.length; i++) {
table[str[i].charCodeAt(0)] = codePoints[i];
}
}
function str2gbk(str, opt = {}) {
if (!table) {
initGbkTable();
}
const buf = [];
for (let i = 0; i < str.length; i++) {
const codePoint = str.codePointAt(i);
const code = String.fromCodePoint(codePoint);
i += code.length-1;
if (codePoint < 0x80) {
buf.push(codePoint);
continue;
}
const gbk = table[codePoint];
if (table.hasOwnProperty(codePoint)) {
const uarr = new Uint8Array(2);
uarr[0] = gbk;
uarr[1] = gbk >> 8;
buf.push(uarr[0], uarr[1]);
} else if (codePoint === 8364) {
// 8364 == '€'.charCodeAt(0)
// Code Page 936 has a single-byte euro sign at 0x80
buf.push(0x80);
} else {
const encoded = str2gbk(`&#${codePoint};`);
for (const charcode of encoded) {
buf.push(charcode);
}
}
}
return buf;
}
}) ();
const docEncoding = document.characterSet.toLowerCase();
const encoder = {
big5: {
encode: str => arr2url(str2big5(str)),
decode: url => decodeURL(url, 'big5'),
encodeBuffer: str => arr2buf(str2big5(str))
},
gbk: {
encode: str => arr2url(str2gbk(str)),
decode: url => decodeURL(url, 'gbk'),
encodeBuffer: str => arr2buf(str2gbk(str))
},
get encode() { return encoder[docEncoding].encode; },
get decode() { return encoder[docEncoding].decode; },
get encodeBuffer() { return encoder[docEncoding].encodeBuffer; },
};
return encoder;
function arr2url(buf) {
return buf.map(charcode => '%' + charcode.toString(16).padStart(2, '0').toUpperCase()).join('');
}
function arr2buf(arr) {
return arr.reduce((buf, charcode, i) => {
buf[i] = charcode;
return buf;
}, new Uint8Array(arr.length));
}
function decodeURL(url, encoding) {
const arr = [];
let inCharcode = false, charcode = '';
for (const char of url) {
if (inCharcode) {
charcode += char;
if (charcode.length === 2) {
arr.push(parseInt(charcode, 16));
inCharcode = false;
charcode = '';
}
} else if (char === '%') {
inCharcode = true;
} else {
arr.push(char.charCodeAt(0));
}
}
const buf = arr.reduce((buf, charcode, i) => {
buf[i] = charcode;
return buf;
}, new Uint8Array(arr.length));
return new TextDecoder(encoding).decode(buf);
}
})();