string-overlap-matching-degree

计算字符串重叠/匹配度计算

目前為 2024-07-24 提交的版本,檢視 最新版本

此腳本不應該直接安裝,它是一個供其他腳本使用的函式庫。欲使用本函式庫,請在腳本 metadata 寫上: // @require https://update.cn-greasyfork.org/scripts/501646/1416065/string-overlap-matching-degree.js

  1.  
  2. /**
  3. * 重叠匹配度
  4. * @author: zhuangjie
  5. * @date: 2024-07-23
  6. */
  7. function overlapMatchingDegreeForObjectArray(keyword = "", objArr = [], fun = (obj) => [], {sort = "desc",onlyHasScope =false,scopeForObjArrContainer} = {}) {
  8. const scopeForData = objArr.map(item => overlapMatchingDegree(keyword, fun(item), sort));
  9. // scope与 objArr 同步排序
  10. sortAndSync(scopeForData, objArr)
  11. if(Array.isArray(scopeForObjArrContainer)) {
  12. // 说明需要分数,倒给
  13. scopeForObjArrContainer.push(...scopeForData)
  14. }
  15. return onlyHasScope ? filterMismatchItem(scopeForData,objArr) : objArr;
  16. }
  17.  
  18. // 根据scopeForData得到新数组objArr
  19. function filterMismatchItem(scopeForData,objArr) {
  20. const result = []
  21. for(let [scope,index] of scopeForData.entries()) {
  22. if(scope != 0) {
  23. result.push(objArr[index])
  24. }
  25. }
  26. return result
  27. }
  28.  
  29. /**
  30. * 计算匹配度外层封装工具
  31. * @param {string} keyword - 匹配字符串1
  32. * @param {Object | Arrayy} topicWeighs - 匹配字符串2与它的权重
  33. * @returns {number} 匹配度分数
  34. */
  35. function overlapMatchingDegree(keyword, topicWeighs = {}, sort = "desc") {
  36. // 兼容topicWeighs为topic数组,元素越靠前权重越高
  37. if (Array.isArray(topicWeighs)) {
  38. const _temp = {}
  39. if (sort === "asc") {
  40. for (let i = 1; i <= topicWeighs.length; i++) {
  41. _temp[topicWeighs[i - 1]] = i;
  42. }
  43. } else {
  44. // desc
  45. for (let i = topicWeighs.length; i > 0; i--) {
  46. _temp[topicWeighs[topicWeighs.length - i]] = i;
  47. }
  48. }
  49. topicWeighs = _temp;
  50. }
  51. let topicList = Object.keys(topicWeighs)
  52. // topic map 得分
  53. const topicScores = topicList.map(topic => {
  54. let topicScore = 0; // 得分
  55. const currentTopicScore = topicWeighs[topic]; // 当前topic“个数”
  56. const overlapLengthBlocksMap = findOverlapBlocks(keyword, topic)
  57. const overlapLengthKeys = Object.keys(overlapLengthBlocksMap);
  58. for (let overlapLengthKey of overlapLengthKeys) {
  59. const currentLengthBlocks = overlapLengthBlocksMap[overlapLengthKey];
  60. topicScore = Math.pow(currentTopicScore, overlapLengthKey) * currentLengthBlocks.length;
  61. }
  62. return topicScore;
  63. })
  64. // 算总分返回
  65. return topicScores.reduce((a, b) => a + b, 0)
  66. }
  67.  
  68. /**
  69. * 查找重叠匹配块(入口函数)
  70. * @param {*} str1
  71. * @param {*} str2
  72. * @returns 返回重叠块 如:{"2": ["好用","推荐"],"3": ["好用推荐"]}
  73. * 算法核心思想:
  74. * -----------------------------------------------------
  75. * sumatrapdf* | sumatrapdf* | sumatrapdf*
  76. * pdf- | pdf- | pdf-
  77. * ------------------------------------------------------
  78. */
  79. function findOverlapBlocks(str1 = "", str2 = "") {
  80. let maxStr2OffsetLength = str1.length - 1;
  81. let minStr2OffsetLength = -(str2.length - 1);
  82. const alignmentHub = { /* "3": ["好用","推荐"] */ }
  83. for (let currentStr2OffsetLength = maxStr2OffsetLength; currentStr2OffsetLength >= minStr2OffsetLength; currentStr2OffsetLength--) {
  84. let str1EndIndex = str1.length - 1;
  85. let str2EndIndex = currentStr2OffsetLength + (str2.length - 1);
  86.  
  87. let overlappingStart = currentStr2OffsetLength >= 0 ? currentStr2OffsetLength : 0; // 重叠闭区间开始 [
  88. let overlappingEnd = str2EndIndex < str1EndIndex ? str2EndIndex : str1EndIndex; // 重叠闭区间结束 ]
  89.  
  90. // 对齐
  91. const alignmentContent = alignment(str1.substring(overlappingStart, overlappingEnd + 1), str2.substring(overlappingStart - currentStr2OffsetLength, overlappingEnd - currentStr2OffsetLength + 1));
  92. // 长重叠 覆盖 短重叠
  93. longOverlappingCoverage(alignmentHub, alignmentContent);
  94. }
  95. return alignmentHub;
  96. }
  97. function longOverlappingCoverage(alignmentHub = {}, alignmentContent = {}) {
  98. const modifiedCols = Object.keys(alignmentContent)
  99. const maxmodifiedCols = Math.max(...modifiedCols)
  100. const minmodifiedCols = Math.min(...modifiedCols)
  101. // 合并到对应的列上
  102. modifiedCols.forEach(col => {
  103. alignmentHub[col] = alignmentHub[col] ? Array.from(new Set(alignmentHub[col].concat(alignmentContent[col]))) : alignmentContent[col];
  104. })
  105. const alignmentHubCol = Object.keys(alignmentHub)
  106. const gtmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) > maxmodifiedCols);
  107. const ltmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) < minmodifiedCols);
  108. // 重新计算各列,过滤掉在modifiedCols的`大于modifiedCols的列`的子串,过滤掉`小于modifiedCols的列`在modifiedCols的子串
  109. // - 过滤掉在modifiedCols的`大于modifiedCols的列`的子串
  110. colElementFilter(alignmentHub, modifiedCols, gtmodifiedCols);
  111. // - 过滤掉`小于modifiedCols的列`在modifiedCols的子串
  112. colElementFilter(alignmentHub, ltmodifiedCols, modifiedCols);
  113.  
  114. }
  115. // 列元素过滤
  116. function colElementFilter(alignmentHub = {}, cols = [], gtCols = []) {
  117. if (cols.length == 0 || gtCols.length == 0) return;
  118. gtCols.forEach(gtCol => {
  119. const gtOverlappingBlocks = alignmentHub[gtCol];
  120. for (let [index, col] of cols.entries()) {
  121. const overlappingBlocks = alignmentHub[col];
  122. if (gtOverlappingBlocks == undefined || overlappingBlocks == undefined) continue;;
  123. alignmentHub[col] = overlappingBlocks.filter(overlappingBlock => {
  124. for (let gtOverlappingBlock of gtOverlappingBlocks) {
  125. if (gtOverlappingBlock.includes(overlappingBlock)) {
  126. return false
  127. }
  128. }
  129. return true;
  130. })
  131. }
  132. })
  133. // 清理掉value为空数组项
  134. // {1: [],2:["好用"]} -to-> {2:["好用"]}
  135. Object.keys(alignmentHub).forEach(key => { if (alignmentHub[key].length == 0) delete alignmentHub[key] });
  136. }
  137. // 对齐
  138. function alignment(str1 = "", str2 = "") {
  139. // 返回 {"1",["我","用"],"2":["推荐","可以"]}
  140. if (str1.length != str2.length) {
  141. throw new Error("算法错误,对齐字符串长度不一致");
  142. }
  143. const overlappingBlocks = {}
  144. let currentOverlappingBlock = "";
  145. for (let i = str1.length - 1; i >= 0; i--) {
  146. if (str1[i] == str2[i]) {
  147. currentOverlappingBlock = str1[i] + currentOverlappingBlock;
  148. if (i - 1 >= 0) continue;
  149. }
  150. if (currentOverlappingBlock.length == 0) {
  151. continue;
  152. } else {
  153. // 块收集
  154. let currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length]
  155. if (currentOverlappingBlockContainer == undefined) {
  156. currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length] = [currentOverlappingBlock]
  157. } else if (!currentOverlappingBlockContainer.includes(currentOverlappingBlock)) {
  158. currentOverlappingBlockContainer.push(currentOverlappingBlock)
  159. }
  160. }
  161. currentOverlappingBlock = "";
  162. }
  163. return overlappingBlocks;
  164. }
  165.  
  166. // 【同步算法-堆实现】
  167. function sortAndSync(arr1, arr2, order = 'desc') {
  168. if (arr1.length !== arr2.length) {
  169. throw new Error("Both arrays must have the same length");
  170. }
  171. function swap(arr, i, j) {
  172. [arr[i], arr[j]] = [arr[j], arr[i]];
  173. }
  174. function heapify(arr1, arr2, n, i, order) {
  175. let largest = i;
  176. let left = 2 * i + 1;
  177. let right = 2 * i + 2;
  178.  
  179. if (order === 'asc') {
  180. if (left < n && arr1[left] > arr1[largest]) {
  181. largest = left;
  182. }
  183. if (right < n && arr1[right] > arr1[largest]) {
  184. largest = right;
  185. }
  186. } else {
  187.  
  188. if (left < n && arr1[left] < arr1[largest]) {
  189. largest = left;
  190. }
  191. if (right < n && arr1[right] < arr1[largest]) {
  192. largest = right;
  193. }
  194. }
  195.  
  196. if (largest !== i) {
  197. swap(arr1, i, largest);
  198. swap(arr2, i, largest);
  199. heapify(arr1, arr2, n, largest, order);
  200. }
  201. }
  202. function buildHeap(arr1, arr2, n, order) {
  203. for (let i = Math.floor(n / 2) - 1; i >= 0; i--) {
  204. heapify(arr1, arr2, n, i, order);
  205. }
  206. }
  207. function heapSort(arr1, arr2, order) {
  208. let n = arr1.length;
  209. buildHeap(arr1, arr2, n, order);
  210.  
  211. for (let i = n - 1; i > 0; i--) {
  212. swap(arr1, 0, i);
  213. swap(arr2, 0, i);
  214. heapify(arr1, arr2, i, 0, order);
  215. }
  216. }
  217. heapSort(arr1, arr2, order);
  218. }
  219.  
  220. // 【算法测试1】
  221. // console.log("-- 算法测试开始 --")
  222. // console.log(findOverlapBlocks("[推荐]sumatrapdf非常好用","pdf 推荐"))
  223. // console.log("-- 算法测试结束 --")
  224.  
  225. // 【算法测试2】
  226. // console.log("匹配度:", overlapMatchingDegree("好用的pdf工具", { "sumatrapdf": 10, "小而好用的pdf阅读器": 8, "https://www.sumatrapdfreader.org/downloadafter": 3 }));
  227.