// ==UserScript==
// @name jpmarumaru 羅馬拼音歌詞
// @namespace Anong0u0
// @version 1.0.1
// @description 讓歌詞能以羅馬拼音形式出現
// @author Anong0u0
// @match *://www.jpmarumaru.com/*
// @icon https://www.google.com/s2/favicons?sz=64&domain=jpmarumaru.com
// @grant GM_xmlhttpRequest
// @grant GM_registerMenuCommand
// @grant GM.setValue
// @grant GM_getValue
// @grant GM_deleteValue
// @connect raw.githubusercontent.com
// @license MIT
// ==/UserScript==
const use_split = GM_getValue("use_split", true)
String.prototype.replaceRegex = function(k, v) { return this.replace(new RegExp(k, "gm"), v) }
const ROMAJI_KANA_ARR = "xtsu=っ|ッ,shi=し|シ,chi=ち|チ,tsu=つ|ツ,ka=か|カ,ki=き|キ,ku=く|ク,ke=け|ケ,ko=こ|コ,ga=が|ガ,gi=ぎ|ギ,gu=ぐ|グ,ge=げ|ゲ,go=ご|ゴ,sa=さ|サ,su=す|ス,se=せ|セ,so=そ|ソ,za=ざ|ザ,ji=じ|ジ,zu=ず|ズ,ze=ぜ|ゼ,zo=ぞ|ゾ,ta=た|タ,te=て|テ,to=と|ト,da=だ|ダ,dzi=ぢ|ヂ,dzu=づ|ヅ,de=で|デ,do=ど|ド,na=な|ナ,ni=に|ニ,nu=ぬ|ヌ,ne=ね|ネ,no=の|ノ,ha=は|ハ,hi=ひ|ヒ,fu=ふ|フ,he=へ|ヘ,ho=ほ|ホ,pa=ぱ|パ,pi=ぴ|ピ,pu=ぷ|プ,pe=ぺ|ペ,po=ぽ|ポ,ba=ば|バ,bi=び|ビ,bu=ぶ|ブ,be=べ|ベ,bo=ぼ|ボ,ma=ま|マ,mi=み|ミ,mu=む|ム,me=め|メ,mo=も|モ,ra=ら|ラ,ri=り|リ,ru=る|ル,re=れ|レ,ro=ろ|ロ,wa=わ|ワ,wi=ゐ|ヰ,we=ゑ|ヱ,wo=を|ヲ,va=ゔぁ|ヴァ,vi=ゔぃ|ヴィ,ve=ゔぇ|ヴェ,vo=ゔぉ|ヴォ,vu=ゔ|ヴ,n=ん|ン,xa=ぁ|ァ,xi=ぃ|ィ,xu=ぅ|ゥ,xe=ぇ|ェ,xo=ぉ|ォ,xya=ゃ|ャ,xyu=ゅ|ュ,xyo=ょ|ョ,ya=や|ヤ,yu=ゆ|ユ,yo=よ|ヨ,a=あ|ア,i=い|イ,u=う|ウ,e=え|エ,o=お|オ,h=っ|ッ,-=ー".split(",").map((e) => e.split("="))
const KANA_REPLACE_ARR = "fux([aieo])=f$1,ixy=y,(s|c)hy=$1h,dzu=zu,(dz|j)y?=j,xtsu([rtpsdfghjkbm])=$1$1,xtsuc=tc,x=~".split(",").map((e) => e.split("="))
String.prototype.toRomaji = function()
{
let kana = this
kana = kana.replace(/([んン])([あいうえおやゆよアイウエオヤユヨ])/gm, "$1'$2")
ROMAJI_KANA_ARR.forEach((arr) => { kana = kana.replaceRegex(arr[1], arr[0]) })
KANA_REPLACE_ARR.forEach((arr) => { kana = kana.replaceRegex(arr[0], arr[1]) })
return kana
}
Node.prototype.getKanaLyrics = function()
{
const temp = this.cloneNode(true)
temp.querySelectorAll("rb").forEach(e => e.remove())
return temp.innerText
}
String.prototype.getKanaLyrics = function()
{
const div = document.createElement("div")
div.innerHTML = this
return div.getKanaLyrics()
}
const lyricsStore =
{
romaji: document.createElement("div"),
romaji2: document.createElement("div"),
splited: GM_getValue("splited", {})
}
lyricsStore.romaji.style = "font-size: 1.6em;overflow: hidden;margin-top:15px;margin-bottom:-10px;color:white"
lyricsStore.romaji2.style = "font-size: 1.6em;overflow: hidden;margin-top:5px"
const ly = document.querySelector("#LyricsCycle"),
ly2 = document.querySelector("#LyricsCycle2"),
ch = document.querySelector("#LyricsTranslate_zh")
ly.insertAdjacentElement("beforebegin", lyricsStore.romaji)
ly2.insertAdjacentElement("beforebegin", lyricsStore.romaji2)
new MutationObserver((e) =>
{
e.forEach((ele) =>
{
if (ele.type == "attributes")
{
lyricsStore.romaji.style.color = ly.style.color
lyricsStore.romaji2.style.color = ly2.style.color
lyricsStore.romaji2.style.display = ch.style.display == "none" ? "block" : "none"
}
else if (ele.addedNodes.length != 0 && ele.target.id.startsWith("LyricsCycle"))
{
ele = ele.target
if (lyricsStore[ele.id] != ele.innerText)
{
lyricsStore[ele.id] = ele.innerText
const ly = ele.getKanaLyrics()
let rmj = use_split && ly in lyricsStore.splited ? lyricsStore.splited[ly].toRomaji() : ly.toRomaji();
if(rmj.match(/[^\u0000-\u4000\uf000-\uffff]/))
{
const nly = json.LyricsYomi[json.Lyrics.indexOf(ly)].getKanaLyrics()
rmj = use_split && nly in lyricsStore.splited ? lyricsStore.splited[nly].toRomaji() : nly.toRomaji();
}
if (ele.id == "LyricsCycle") lyricsStore.romaji.innerText = rmj
else lyricsStore.romaji2.innerText = rmj
}
}
})
}).observe(document.querySelector("div#divLyrics"),
{
subtree: true,
childList: true,
attributes: true
})
GM_registerMenuCommand(`${use_split?"關閉":"開啟"}日文分詞`, ()=>
{
GM.setValue("use_split", !use_split).then(()=>
{
if(use_split && confirm(`已關閉日文分詞,是否刪除分詞字典與分詞快取?`))
{
indexedDB.deleteDatabase("naist-jdic");
GM_deleteValue("splited")
GM_deleteValue("naist-jdic_init")
}
location.reload()
})
})
if (!use_split) return;
const tip = document.createElement("span")
tip.style = "display: unset; margin-left: 24px;"
document.querySelector("h2.main-title").append(tip)
const requests = ({ method, url, data = null, headers = {}, type = "" }) => {
return new Promise((resolve) => {
GM_xmlhttpRequest({
method: method,
url: url,
headers: headers,
responseType: type,
data: data,
onload: resolve
});
});
};
const UNKNOWN_DEFINITION = [
{ name: 'DEFAULT', invoke: false, regexp: '' }, // 0
{ name: 'SPACE', invoke: true, regexp: '\\s+' }, // 1
{ name: 'KANJI', invoke: false, regexp: '[\u{2e80}-\u{2fdf}々〇〻\u{3400}-\u{4dbf}\u{4e00}-\u{9fff}\u{f900}-\u{faff}\u{20000}-\u{2ffff}]{1,2}' }, // 2
{ name: 'SYMBOL', invoke: true, regexp: '[!-/:-@[-`{-~¡-¿À-ȶḀ-ỹ!-/:-@[-`{-・¢-\u{ffef}\u{2000}-\u{206f}₠-⅏←-⥿⨀-\u{2bff}\u{3000}-\u{303f}㈀-㏿︰-﹫]+' }, // 3
{ name: 'NUMERIC', invoke: true, regexp: '[0-90-9⁰-\u{209f}⅐-\u{218f}]+' }, // 4
{ name: 'ALPHA', invoke: true, regexp: '[A-Za-zA-Za-z]+' }, // 5
{ name: 'HIRAGANA', invoke: false, regexp: '[ぁ-ゟー]{1,4}' }, // 6
{ name: 'KATAKANA', invoke: true, regexp: '[ァ-ヿㇰ-ㇿヲ-ン゙゚]+' }, // 7
{ name: 'KANJINUMERIC', invoke: true, regexp: '[〇一二三四五六七八九十百千万億兆京]+' }, // 8
{ name: 'GREEK', invoke: true, regexp: '[ʹ-ϻ]+' }, // 9
{ name: 'CYRILLIC', invoke: true, regexp: '[Ѐ-ӹԀ-ԏ]+' }, // 10
];
const Halfwidth2Fullwidth = (str) => {
if (!str) return '';
let result = [];
for (let s of str) {
let p = s.codePointAt(0);
if (0x0020 < p && p < 0x007f) {
result.push(String.fromCharCode(p + 65248));
} else {
result.push(s);
}
}
return result.join('');
}
const BOS = {
word: '\x02',
id: 0,
cost: 0,
start: 0,
end: 1,
};
const EOS = {
word: '\x03',
id: 0,
cost: 0,
};
class Path extends Array {
constructor(length) {
super();
this.length = length || 0;
this.cost = 0;
}
format() {
const cost = this.cost;
const newPath = Path.from(this.slice(1, this.length - 1));
newPath.cost = cost;
return newPath;
}
static from(arraylike) {
const length = arraylike.length;
const path = new Path(length)
for (let i = 0; i < length; i++) path[i] = arraylike[i];
return path;
}
}
const mtx = [];
class Lattice {
constructor(input) {
this.input = [...input];
}
lookup(unkDic) {
let chars = this.input;
const CHAR_LENGTH = chars.length;
return new Promise((resolve, reject) => {
indexedDB.open("naist-jdic").onsuccess = e => {
let db = e.target.result, dic;
try {
dic = db.transaction(['dictionary'], 'readonly').objectStore('dictionary').index('index');
} catch (e) {
db.close();
indexedDB.deleteDatabase("naist-jdic");
reject("找不到字典");
}
let targets = [], promises = [];
for (let i = 0; i < CHAR_LENGTH; i++) {
for (let j = i; j < CHAR_LENGTH; j++) {
promises.push(new Promise((resolve, reject) => {
let targetKey = chars.slice(i, j + 1).join('');
let req = dic.openCursor(Halfwidth2Fullwidth(targetKey));
req.onsuccess = e => {
let cursor = e.target.result;
if (cursor) {
cursor.value.start = i + 1;
cursor.value.end = j + 2;
targets.push(cursor.value);
cursor.continue();
} else {
// Skip DEFAULT (k=0)
for (let k = 1; k < unkDic.length; k++) {
if (new RegExp('^' + unkDic[k].regexp + '$', 'u').test(targetKey)) {
targets.push({
word: targetKey,
id: unkDic[k].id,
cost: unkDic[k].cost,
pos: unkDic[k].pos,
start: i + 1,
end: j + 2,
note: k && '未知語'
|| targetKey === '\n' && '改行'
|| targetKey === '\t' && 'タブ'
|| '空白'
});
}
}
resolve();
}
};
req.onerror = e => reject(e);
}));
}
}
Promise.all(promises).then(() => {
targets.push(Object.assign({}, BOS), Object.assign({}, EOS, {
start: CHAR_LENGTH + 1,
end: CHAR_LENGTH + 2,
}));
this.words = targets.sort((a, b) => {
return a.start - b.start || a.end - b.end;
});
resolve(this.words);
}, reject).then(() => db.close());
};
});
}
tokenize() {
let words = this.words;
let len = words.length;
let mCosts = new Array(len).fill().map(() => new Array(len));
return new Promise((resolve, reject) => {
let promises = [];
indexedDB.open("naist-jdic").onsuccess = e => {
let db = e.target.result;
let matrix = db.transaction(['matrix'], 'readonly').objectStore('matrix');
for (let y = 0; y < len; y++) {
let rightId = words[y].id;
promises.push(new Promise((resolve, reject) => {
if (mtx[rightId]) {
resolve();
} else {
let req = matrix.get(rightId);
req.onsuccess = e => {
let result = e.target.result;
if (result) {
mtx[rightId] = e.target.result.left;
resolve();
} else {
reject("找不到matrix");
}
};
req.onerror = e => reject(e);
}
}).then(() => {
for (let x = 0; x < len; x++) {
let leftId = words[x].id;
mCosts[y][x] = words[x].end === words[y].start ? mtx[rightId][leftId] : Infinity;
}
}));
}
Promise.all(promises).then(() => {
let vertex = new Array(len).fill().map(() => ({
cost: Infinity,
next: -1,
visited: false,
}));
vertex[len - 1] = {
cost: words[len - 1].cost, // 0
next: len,
visited: false,
};
while (true) {
let min = Infinity;
for (let i = 0; i < len; i++) {
if (!vertex[i].visited && vertex[i].cost < min) min = vertex[i].cost;
}
if (min === Infinity) break;
for (let y = 0; y < len; y++) {
if (vertex[y].cost === min) {
for (let x = 0; x < len; x++) {
let sum = mCosts[y][x] + words[y].cost + min;
if (sum < vertex[x].cost) {
vertex[x].cost = sum;
vertex[x].next = y;
}
}
vertex[y].visited = true;
}
}
}
let index = 0, path = new Path();
path.cost = vertex[index].cost;
while (index < len) {
let word = words[index];
if (word) {
path.push(word);
index = vertex[index].next;
} else {
reject("找不到word");
break;
}
}
resolve(path.format());
}).catch(e => reject(e));
};
});
}
}
const Katakana2Hiragana = (str) => {
if (!str) return '';
let result = [];
for (let s of str) {
let p = s.codePointAt(0);
if (0x30a0 < p && p < 0x30f5) {
result.push(String.fromCharCode(p - 96));
} else {
result.push(s);
}
}
return result.join('');
}
(async ()=>
{
const buf = await requests({ method: "get", url: "https://raw.githubusercontent.com/Anong0u0/MeCabJS/master/naist-jdic.unknown.bin", type: "arraybuffer" })
const array = new Uint16Array(buf.response);
const unkDicAll = new Array(array.length / 4);
for (let i = 0; i < unkDicAll.length; i++) {
unkDicAll[i] = Object.assign({}, UNKNOWN_DEFINITION[array[i * 4 + 0]], {
id: array[i * 4 + 1],
cost: array[i * 4 + 2],
pos: array[i * 4 + 3],
});
}
const unkDicNormal = unkDicAll.filter(v => v.invoke);
delete buf
delete array
String.prototype.mecabSplit = function() {
return new Promise(r => {
const lattice = new Lattice(this)
new Promise((resolve, reject) => {
lattice.lookup(unkDicNormal || [])
.then(() => lattice.tokenize(), e => reject(e))
.then(v => resolve(v), () => lattice.lookup(unkDicAll || []))
.then(() => lattice.tokenize())
.then(v => resolve(v))
.catch(e => reject(e));
}).then(v => {
const s = Katakana2Hiragana(v.map((e) => e.pron || e.orth || e.word).join(" ")).replace(/ 、 /g, "、").replace(/ -/g, "-")
r(s)
}).catch(()=>{r(String(this))})
})
};
const splitLyrics = async () =>
{
const total = json.LyricsYomi.length
lyricsStore.splited = GM_getValue("splited", {})
let now = 0
tip.innerText = `分詞中(${now}/${total})...`
for(let ly of json.LyricsYomi)
{
now++
ly = ly.getKanaLyrics()
if(ly in lyricsStore.splited) continue
lyricsStore.splited[ly] = await ly.mecabSplit()
tip.innerText = `分詞中(${now}/${total})...`
GM.setValue("splited", lyricsStore.splited)
}
tip.remove()
}
if(GM_getValue("naist-jdic_init")) splitLyrics();
if (!GM_getValue("naist-jdic_init", false)) {
tip.innerText = `下載字典中...`
const workerContent = `
self.onmessage = e => {
importScripts(e.data.gunzip);
Promise.all([
new Promise((resolve) => {
fetch(e.data.bin).then(res => res.arrayBuffer()).then(buffer => {
let u8array = new Zlib.Gunzip(new Uint8Array(buffer)).decompress();
resolve(new Int16Array(u8array.buffer));
});
}),
new Promise((resolve) => {
fetch(e.data.tsv).then(res => res.arrayBuffer()).then(buffer => {
let u8array = new Zlib.Gunzip(new Uint8Array(buffer)).decompress();
resolve(new TextDecoder().decode(u8array).split('\\n'));
});
}),
]).then(values => {
const openReq = indexedDB.open("naist-jdic");
openReq.onupgradeneeded = e => {
const db = e.target.result;
db.createObjectStore('matrix', { keyPath: 'right' });
db.createObjectStore('dictionary', { autoIncrement: true }).createIndex('index', 'word');
};
openReq.onsuccess = e => {
const db = e.target.result;
const tx = db.transaction(['matrix', 'dictionary'], 'readwrite');
tx.oncomplete = e => {
self.postMessage({ state: 'done' });
self.close();
};
tx.onerror = e => {
throw tx.error;
};
tx.onabort = tx.onerror;
const matrix = tx.objectStore('matrix');
const bin = values[0];
const SizeX = bin[0] >>> 0, SizeY = bin[1] >>> 0;
const dictionary = tx.objectStore('dictionary');
const words = values[1];
const SizeWords = words.length;
const SumSize = SizeY + SizeWords,
one = (SumSize*.01).toFixed(0);
for (let i = 0; i < SizeY; i++) {
const start = 2 + i * SizeY;
const req = matrix.put({
right: i,
left: [...bin.subarray(start, start + SizeX)],
});
req.onsuccess = e => {if(i%one==0) self.postMessage({ state: 'processing', total: SumSize, now: i })}
}
for (let j = 0; j < SizeWords; j++) {
const c = words[j].split('\\t');
let token = {
word: c[0],
id: Number(c[1]),
cost: Number(c[2]),
pos: Number(c[3]),
};
if (c[4]) token.cjg = [ c[4], c[5] ];
if (c[6]) token.base = c[6];
if (c[7]) token.orth = c[7];
if (c[8]) token.pron = c[8];
const req = dictionary.put(token);
req.onsuccess = e => {if((SizeY+j)%one==0) self.postMessage({ state: 'processing', total: SumSize, now: (SizeY+j) })}
}
};
openReq.onerror = e => {throw 'データベースに接続できません';};
}).catch(e => {
self.postMessage({ state: 'error' });
self.close();
});
}
`
const worker = new Worker(URL.createObjectURL(new Blob([workerContent], { type: 'application/javascript' })));
const bin = URL.createObjectURL((await requests({ method: "get", url: "https://raw.githubusercontent.com/Anong0u0/MeCabJS/master/naist-jdic.matrix.bin.gz", type: "blob" })).response),
tsv = URL.createObjectURL((await requests({ method: "get", url: "https://raw.githubusercontent.com/Anong0u0/MeCabJS/master/naist-jdic.min.tsv.gz", type: "blob" })).response),
gunzip = URL.createObjectURL((await requests({ method: "get", url: "https://raw.githubusercontent.com/Anong0u0/MeCabJS/master/gunzip.min.js", type: "blob" })).response)
worker.onmessage = e => {
e = e.data
if(e.state == "processing")
{
tip.innerText = `整理字典中(${(e.now/e.total*100).toFixed(0)}%)...`
return;
}
worker.terminate();
if (e.state === 'done') {
GM.setValue("naist-jdic_init", true)
splitLyrics();
}
else if (e.state === 'error')
{
tip.innerText = `字典整理出錯`
alert("羅馬拼音腳本執行錯誤");
}
delete bin
delete tsv
delete gunzip
};
worker.onerror = (e) => {
console.log(e);
tip.innerText = `字典整理出錯`
alert("羅馬拼音腳本執行錯誤");
worker.terminate()
}
worker.postMessage({ bin: bin, tsv: tsv, gunzip: gunzip });
}
})()