string-overlap-matching-degree

计算字符串重叠/匹配度计算

目前为 2024-08-05 提交的版本。查看 最新版本

此脚本不应直接安装,它是一个供其他脚本使用的外部库。如果您需要使用该库,请在脚本元属性加入:// @require https://update.cn-greasyfork.org/scripts/501646/1422258/string-overlap-matching-degree.js

  1. /**
  2. * 重叠匹配度
  3. * @author: zhuangjie
  4. * @date: 2024-07-23
  5. */
  6. function overlapMatchingDegreeForObjectArray(keyword = "", objArr = [], fun = (obj) => [], {sort = "desc",onlyHasScope =false,scopeForObjArrContainer} = {}) {
  7. const scopeForData = objArr.map(item => overlapMatchingDegree(keyword, fun(item), sort));
  8. // scope与 objArr 同步排序
  9. sortAndSync(scopeForData, objArr)
  10. if(Array.isArray(scopeForObjArrContainer)) {
  11. // 说明需要分数,倒给
  12. scopeForObjArrContainer.push(...scopeForData)
  13. }
  14. return onlyHasScope ? filterMismatchItem(scopeForData,objArr) : objArr;
  15. }
  16.  
  17. // 根据scopeForData得到新数组objArr
  18. function filterMismatchItem(scopeForData,objArr) {
  19. const result = []
  20. for(let [scope,index] of scopeForData.entries()) {
  21. if(scope != 0) {
  22. result.push(objArr[index])
  23. }
  24. }
  25. return result
  26. }
  27.  
  28. /**
  29. * 计算匹配度外层封装工具
  30. * @param {string} keyword - 匹配字符串1
  31. * @param {Object | Arrayy} topicWeighs - 匹配字符串2与它的权重
  32. * @returns {number} 匹配度分数
  33. */
  34. function overlapMatchingDegree(keyword, topicWeighs = {}, sort = "desc") {
  35. // 兼容topicWeighs为topic数组,元素越靠前权重越高
  36. if (Array.isArray(topicWeighs)) {
  37. const _temp = {}
  38. if (sort === "asc") {
  39. for (let i = 1; i <= topicWeighs.length; i++) {
  40. _temp[topicWeighs[i - 1]] = i;
  41. }
  42. } else {
  43. // desc
  44. for (let i = topicWeighs.length; i > 0; i--) {
  45. _temp[topicWeighs[topicWeighs.length - i]] = i;
  46. }
  47. }
  48. topicWeighs = _temp;
  49. }
  50. let topicList = Object.keys(topicWeighs)
  51. // topic map 得分
  52. const topicScores = topicList.map(topic => {
  53. let topicScore = 0; // 得分
  54. const currentTopicScore = topicWeighs[topic]; // 当前topic“个数”
  55. const overlapLengthBlocksMap = findOverlapBlocks(keyword, topic)
  56. const overlapLengthKeys = Object.keys(overlapLengthBlocksMap);
  57. for (let overlapLengthKey of overlapLengthKeys) {
  58. const currentLengthBlocks = overlapLengthBlocksMap[overlapLengthKey];
  59. topicScore = Math.pow(currentTopicScore, overlapLengthKey) * currentLengthBlocks.length;
  60. }
  61. return topicScore;
  62. })
  63. // 算总分返回
  64. return topicScores.reduce((a, b) => a + b, 0)
  65. }
  66.  
  67. /**
  68. * 查找重叠匹配块(入口函数)
  69. * @param {*} str1
  70. * @param {*} str2
  71. * @returns 返回重叠块 如:{"2": ["好用","推荐"],"3": ["好用推荐"]}
  72. * 算法核心思想:
  73. * -----------------------------------------------------
  74. * sumatrapdf* | sumatrapdf* | sumatrapdf*
  75. * pdf- | pdf- | pdf-
  76. * ------------------------------------------------------
  77. */
  78. function findOverlapBlocks(str1 = "", str2 = "") {
  79. let maxStr2OffsetLength = str1.length - 1;
  80. let minStr2OffsetLength = -(str2.length - 1);
  81. const alignmentHub = { /* "3": ["好用","推荐"] */ }
  82. for (let currentStr2OffsetLength = maxStr2OffsetLength; currentStr2OffsetLength >= minStr2OffsetLength; currentStr2OffsetLength--) {
  83. let str1EndIndex = str1.length - 1;
  84. let str2EndIndex = currentStr2OffsetLength + (str2.length - 1);
  85.  
  86. let overlappingStart = currentStr2OffsetLength >= 0 ? currentStr2OffsetLength : 0; // 重叠闭区间开始 [
  87. let overlappingEnd = str2EndIndex < str1EndIndex ? str2EndIndex : str1EndIndex; // 重叠闭区间结束 ]
  88.  
  89. // 对齐
  90. const alignmentContent = alignment(str1.substring(overlappingStart, overlappingEnd + 1), str2.substring(overlappingStart - currentStr2OffsetLength, overlappingEnd - currentStr2OffsetLength + 1));
  91. // 长重叠 覆盖 短重叠
  92. longOverlappingCoverage(alignmentHub, alignmentContent);
  93. }
  94. return alignmentHub;
  95. }
  96. function longOverlappingCoverage(alignmentHub = {}, alignmentContent = {}) {
  97. const modifiedCols = Object.keys(alignmentContent)
  98. const maxmodifiedCols = Math.max(...modifiedCols)
  99. const minmodifiedCols = Math.min(...modifiedCols)
  100. // 合并到对应的列上
  101. modifiedCols.forEach(col => {
  102. alignmentHub[col] = alignmentHub[col] ? Array.from(new Set(alignmentHub[col].concat(alignmentContent[col]))) : alignmentContent[col];
  103. })
  104. const alignmentHubCol = Object.keys(alignmentHub)
  105. const gtmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) > maxmodifiedCols);
  106. const ltmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) < minmodifiedCols);
  107. // 重新计算各列,过滤掉在modifiedCols的`大于modifiedCols的列`的子串,过滤掉`小于modifiedCols的列`在modifiedCols的子串
  108. // - 过滤掉在modifiedCols的`大于modifiedCols的列`的子串
  109. colElementFilter(alignmentHub, modifiedCols, gtmodifiedCols);
  110. // - 过滤掉`小于modifiedCols的列`在modifiedCols的子串
  111. colElementFilter(alignmentHub, ltmodifiedCols, modifiedCols);
  112.  
  113. }
  114. // 列元素过滤
  115. function colElementFilter(alignmentHub = {}, cols = [], gtCols = []) {
  116. if (cols.length == 0 || gtCols.length == 0) return;
  117. gtCols.forEach(gtCol => {
  118. const gtOverlappingBlocks = alignmentHub[gtCol];
  119. for (let [index, col] of cols.entries()) {
  120. const overlappingBlocks = alignmentHub[col];
  121. if (gtOverlappingBlocks == undefined || overlappingBlocks == undefined) continue;;
  122. alignmentHub[col] = overlappingBlocks.filter(overlappingBlock => {
  123. for (let gtOverlappingBlock of gtOverlappingBlocks) {
  124. if (gtOverlappingBlock.includes(overlappingBlock)) {
  125. return false
  126. }
  127. }
  128. return true;
  129. })
  130. }
  131. })
  132. // 清理掉value为空数组项
  133. // {1: [],2:["好用"]} -to-> {2:["好用"]}
  134. Object.keys(alignmentHub).forEach(key => { if (alignmentHub[key].length == 0) delete alignmentHub[key] });
  135. }
  136. // 对齐
  137. function alignment(str1 = "", str2 = "") {
  138. // 返回 {"1",["我","用"],"2":["推荐","可以"]}
  139. if (str1.length != str2.length) {
  140. throw new Error("算法错误,对齐字符串长度不一致");
  141. }
  142. const overlappingBlocks = {}
  143. let currentOverlappingBlock = "";
  144. for (let i = str1.length - 1; i >= 0; i--) {
  145. if (str1[i] == str2[i]) {
  146. currentOverlappingBlock = str1[i] + currentOverlappingBlock;
  147. if (i - 1 >= 0) continue;
  148. }
  149. if (currentOverlappingBlock.length == 0) {
  150. continue;
  151. } else {
  152. // 块收集
  153. let currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length]
  154. if (currentOverlappingBlockContainer == undefined) {
  155. currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length] = [currentOverlappingBlock]
  156. } else if (!currentOverlappingBlockContainer.includes(currentOverlappingBlock)) {
  157. currentOverlappingBlockContainer.push(currentOverlappingBlock)
  158. }
  159. }
  160. currentOverlappingBlock = "";
  161. }
  162. return overlappingBlocks;
  163. }
  164.  
  165. // 【同步算法-堆实现】
  166. function sortAndSync(arr1, arr2, order = 'desc') {
  167. if (arr1.length !== arr2.length) {
  168. throw new Error("Both arrays must have the same length");
  169. }
  170. function swap(arr, i, j) {
  171. [arr[i], arr[j]] = [arr[j], arr[i]];
  172. }
  173. function heapify(arr1, arr2, n, i, order) {
  174. let largest = i;
  175. let left = 2 * i + 1;
  176. let right = 2 * i + 2;
  177.  
  178. if (order === 'asc') {
  179. if (left < n && arr1[left] > arr1[largest]) {
  180. largest = left;
  181. }
  182. if (right < n && arr1[right] > arr1[largest]) {
  183. largest = right;
  184. }
  185. } else {
  186.  
  187. if (left < n && arr1[left] < arr1[largest]) {
  188. largest = left;
  189. }
  190. if (right < n && arr1[right] < arr1[largest]) {
  191. largest = right;
  192. }
  193. }
  194.  
  195. if (largest !== i) {
  196. swap(arr1, i, largest);
  197. swap(arr2, i, largest);
  198. heapify(arr1, arr2, n, largest, order);
  199. }
  200. }
  201. function buildHeap(arr1, arr2, n, order) {
  202. for (let i = Math.floor(n / 2) - 1; i >= 0; i--) {
  203. heapify(arr1, arr2, n, i, order);
  204. }
  205. }
  206. function heapSort(arr1, arr2, order) {
  207. let n = arr1.length;
  208. buildHeap(arr1, arr2, n, order);
  209.  
  210. for (let i = n - 1; i > 0; i--) {
  211. swap(arr1, 0, i);
  212. swap(arr2, 0, i);
  213. heapify(arr1, arr2, i, 0, order);
  214. }
  215. }
  216. heapSort(arr1, arr2, order);
  217. }
  218.  
  219. // 【算法测试1】
  220. // console.log("-- 算法测试开始 --")
  221. // console.log(findOverlapBlocks("[推荐]sumatrapdf非常好用","pdf 推荐"))
  222. // console.log("-- 算法测试结束 --")
  223.  
  224. // 【算法测试2】
  225. // console.log("匹配度:", overlapMatchingDegree("好用的pdf工具", { "sumatrapdf": 10, "小而好用的pdf阅读器": 8, "https://www.sumatrapdfreader.org/downloadafter": 3 }));