SearchJumper levenshtein addon

Add similarity search based on Levenshtein distance to the highlight feature of SearchJumper.

  1. // ==UserScript==
  2. // @name SearchJumper levenshtein addon
  3. // @name:zh-CN 搜索酱单词模式扩展
  4. // @name:zh-TW 搜尋醬單詞模式擴展
  5. // @namespace hoothin
  6. // @version 0.2
  7. // @description Add similarity search based on Levenshtein distance to the highlight feature of SearchJumper.
  8. // @description:zh-CN 为搜索酱的页内高亮添加基于莱文斯坦距离的相似度查找
  9. // @description:zh-TW 為搜尋醬的頁内高亮添加基於萊文斯坦距離的相似度查找
  10. // @author hoothin
  11. // @match *://*/*
  12. // @grant unsafeWindow
  13. // @run-at document-start
  14. // ==/UserScript==
  15.  
  16. (function() {
  17. 'use strict';
  18. var _unsafeWindow = (typeof unsafeWindow == 'undefined') ? window : unsafeWindow;
  19. if (!_unsafeWindow.searchJumperAddons) _unsafeWindow.searchJumperAddons = [];
  20. function levenshteinDistance(a, b) {
  21. //構造矩陣
  22. const distanceMatrix = Array(b.length + 1).fill(null).map(() => Array(a.length + 1).fill(null));
  23. //第一行
  24. for (let i = 0; i <= a.length; i += 1) {
  25. distanceMatrix[0][i] = i;
  26. }
  27. //第一列
  28. for (let j = 0; j <= b.length; j += 1) {
  29. distanceMatrix[j][0] = j;
  30. }
  31. for (let j = 1; j <= b.length; j += 1) {
  32. for (let i = 1; i <= a.length; i += 1) {
  33. const indicator = a[i - 1] === b[j - 1] ? 0 : 1;
  34. distanceMatrix[j][i] = Math.min(
  35. distanceMatrix[j][i - 1] + 1, // 前一個,增加位數,必須加一
  36. distanceMatrix[j - 1][i] + 1, // 上一個,增加位數,必須加一
  37. distanceMatrix[j - 1][i - 1] + indicator, // 斜方向一個,位數不變
  38. );
  39. }
  40. }
  41. return distanceMatrix[b.length][a.length];
  42. }
  43. const gapStr = "[\n\/\\'\"‘’“”,.!\?,。!?…\(\) ]";
  44. const gapStrs = new RegExp(gapStr + "+", "g");
  45. _unsafeWindow.searchJumperAddons.push({
  46. name: "Levenshtein",
  47. type: "findInPage",
  48. sort: 0,
  49. run: (text, keywords) => {
  50. if (!text || !keywords) return {matched: false};
  51. if (keywords.charCodeAt(0) > 255) {
  52. let len = keywords.length;
  53. let pos = text.toUpperCase().indexOf(keywords.toUpperCase());
  54. return {matched: pos != -1, pos: pos, len: len};
  55. }
  56. text = text.toLowerCase();
  57. keywords = keywords.toLowerCase();
  58. let wordArr = text.replace(gapStrs, " ").split(" ");
  59. let kwArr = keywords.replace(gapStrs, " ").split(" ");
  60. let matched = false, pos = -1, len = 0, matchedStr = [];
  61. for (let i = 0; i < wordArr.length; i++) {
  62. matched = true;
  63. matchedStr = [];
  64. for (let j = 0; j < kwArr.length; j++) {
  65. let kwLen = kwArr[j].length;
  66. let maxTolerance = kwLen>>2;
  67. if (kwLen > 3) maxTolerance++;
  68. if (!wordArr[i + j] || levenshteinDistance(kwArr[j], wordArr[i + j]) > maxTolerance) {
  69. matched = false;
  70. break;
  71. } else {
  72. matchedStr.push(wordArr[i + j].replace(/([\[\]\(\)\^\$\.\+\*\?\|\{\}\-])/g, "\\$1"));
  73. }
  74. }
  75. if (matched) {
  76. break;
  77. }
  78. }
  79. if (matched) {
  80. let wordMatch = text.match(new RegExp(`(\\b|\\s)(` + matchedStr.join(gapStr + "+") + `)(\\b|\\s)`, "i"));
  81. if (wordMatch) {
  82. let content = wordMatch[2];
  83. len = content.length;
  84. pos = wordMatch.index + wordMatch[1].length;
  85. }
  86. }
  87. return {matched: matched, pos: pos, len: len};
  88. }
  89. });
  90. })();