string-overlap-matching-degree

计算字符串重叠/匹配度计算

当前为 2024-07-24 提交的版本,查看 最新版本

此脚本不应直接安装。它是供其他脚本使用的外部库,要使用该库请加入元指令 // @require https://update.cn-greasyfork.org/scripts/501646/1416061/string-overlap-matching-degree.js

  1. // ==UserScript==
  2. // @name string-overlap-matching-degree
  3. // @namespace http://tampermonkey.net/
  4. // @version 1.0.0
  5. // @description 计算字符串重叠/匹配度计算
  6. // @license Apache License 2.0
  7. // @author zhuangjie
  8. // ==/UserScript==
  9. // commented by zhuangjie
  10.  
  11. /**
  12. * 重叠匹配度
  13. * @author: zhuangjie
  14. * @date: 2024-07-23
  15. */
  16. function overlapMatchingDegreeForObjectArray(keyword = "", objArr = [], fun = (obj) => [], {sort = "desc",onlyHasScope =false,scopeForObjArrContainer} = {}) {
  17. const scopeForData = objArr.map(item => overlapMatchingDegree(keyword, fun(item), sort));
  18. // scope与 objArr 同步排序
  19. sortAndSync(scopeForData, objArr)
  20. if(Array.isArray(scopeForObjArrContainer)) {
  21. // 说明需要分数,倒给
  22. scopeForObjArrContainer.push(...scopeForData)
  23. }
  24. return onlyHasScope ? filterMismatchItem(scopeForData,objArr) : objArr;
  25. }
  26.  
  27. // 根据scopeForData得到新数组objArr
  28. function filterMismatchItem(scopeForData,objArr) {
  29. const result = []
  30. for(let [scope,index] of scopeForData.entries()) {
  31. if(scope != 0) {
  32. result.push(objArr[index])
  33. }
  34. }
  35. return result
  36. }
  37.  
  38. /**
  39. * 计算匹配度外层封装工具
  40. * @param {string} keyword - 匹配字符串1
  41. * @param {Object | Arrayy} topicWeighs - 匹配字符串2与它的权重
  42. * @returns {number} 匹配度分数
  43. */
  44. function overlapMatchingDegree(keyword, topicWeighs = {}, sort = "desc") {
  45. // 兼容topicWeighs为topic数组,元素越靠前权重越高
  46. if (Array.isArray(topicWeighs)) {
  47. const _temp = {}
  48. if (sort === "asc") {
  49. for (let i = 1; i <= topicWeighs.length; i++) {
  50. _temp[topicWeighs[i - 1]] = i;
  51. }
  52. } else {
  53. // desc
  54. for (let i = topicWeighs.length; i > 0; i--) {
  55. _temp[topicWeighs[topicWeighs.length - i]] = i;
  56. }
  57. }
  58. topicWeighs = _temp;
  59. }
  60. let topicList = Object.keys(topicWeighs)
  61. // topic map 得分
  62. const topicScores = topicList.map(topic => {
  63. let topicScore = 0; // 得分
  64. const currentTopicScore = topicWeighs[topic]; // 当前topic“个数”
  65. const overlapLengthBlocksMap = findOverlapBlocks(keyword, topic)
  66. const overlapLengthKeys = Object.keys(overlapLengthBlocksMap);
  67. for (let overlapLengthKey of overlapLengthKeys) {
  68. const currentLengthBlocks = overlapLengthBlocksMap[overlapLengthKey];
  69. topicScore = Math.pow(currentTopicScore, overlapLengthKey) * currentLengthBlocks.length;
  70. }
  71. return topicScore;
  72. })
  73. // 算总分返回
  74. return topicScores.reduce((a, b) => a + b, 0)
  75. }
  76.  
  77. /**
  78. * 查找重叠匹配块(入口函数)
  79. * @param {*} str1
  80. * @param {*} str2
  81. * @returns 返回重叠块 如:{"2": ["好用","推荐"],"3": ["好用推荐"]}
  82. * 算法核心思想:
  83. * -----------------------------------------------------
  84. * sumatrapdf* | sumatrapdf* | sumatrapdf*
  85. * pdf- | pdf- | pdf-
  86. * ------------------------------------------------------
  87. */
  88. function findOverlapBlocks(str1 = "", str2 = "") {
  89. let maxStr2OffsetLength = str1.length - 1;
  90. let minStr2OffsetLength = -(str2.length - 1);
  91. const alignmentHub = { /* "3": ["好用","推荐"] */ }
  92. for (let currentStr2OffsetLength = maxStr2OffsetLength; currentStr2OffsetLength >= minStr2OffsetLength; currentStr2OffsetLength--) {
  93. let str1EndIndex = str1.length - 1;
  94. let str2EndIndex = currentStr2OffsetLength + (str2.length - 1);
  95.  
  96. let overlappingStart = currentStr2OffsetLength >= 0 ? currentStr2OffsetLength : 0; // 重叠闭区间开始 [
  97. let overlappingEnd = str2EndIndex < str1EndIndex ? str2EndIndex : str1EndIndex; // 重叠闭区间结束 ]
  98.  
  99. // 对齐
  100. const alignmentContent = alignment(str1.substring(overlappingStart, overlappingEnd + 1), str2.substring(overlappingStart - currentStr2OffsetLength, overlappingEnd - currentStr2OffsetLength + 1));
  101. // 长重叠 覆盖 短重叠
  102. longOverlappingCoverage(alignmentHub, alignmentContent);
  103. }
  104. return alignmentHub;
  105. }
  106. function longOverlappingCoverage(alignmentHub = {}, alignmentContent = {}) {
  107. const modifiedCols = Object.keys(alignmentContent)
  108. const maxmodifiedCols = Math.max(...modifiedCols)
  109. const minmodifiedCols = Math.min(...modifiedCols)
  110. // 合并到对应的列上
  111. modifiedCols.forEach(col => {
  112. alignmentHub[col] = alignmentHub[col] ? Array.from(new Set(alignmentHub[col].concat(alignmentContent[col]))) : alignmentContent[col];
  113. })
  114. const alignmentHubCol = Object.keys(alignmentHub)
  115. const gtmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) > maxmodifiedCols);
  116. const ltmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) < minmodifiedCols);
  117. // 重新计算各列,过滤掉在modifiedCols的`大于modifiedCols的列`的子串,过滤掉`小于modifiedCols的列`在modifiedCols的子串
  118. // - 过滤掉在modifiedCols的`大于modifiedCols的列`的子串
  119. colElementFilter(alignmentHub, modifiedCols, gtmodifiedCols);
  120. // - 过滤掉`小于modifiedCols的列`在modifiedCols的子串
  121. colElementFilter(alignmentHub, ltmodifiedCols, modifiedCols);
  122.  
  123. }
  124. // 列元素过滤
  125. function colElementFilter(alignmentHub = {}, cols = [], gtCols = []) {
  126. if (cols.length == 0 || gtCols.length == 0) return;
  127. gtCols.forEach(gtCol => {
  128. const gtOverlappingBlocks = alignmentHub[gtCol];
  129. for (let [index, col] of cols.entries()) {
  130. const overlappingBlocks = alignmentHub[col];
  131. if (gtOverlappingBlocks == undefined || overlappingBlocks == undefined) continue;;
  132. alignmentHub[col] = overlappingBlocks.filter(overlappingBlock => {
  133. for (let gtOverlappingBlock of gtOverlappingBlocks) {
  134. if (gtOverlappingBlock.includes(overlappingBlock)) {
  135. return false
  136. }
  137. }
  138. return true;
  139. })
  140. }
  141. })
  142. // 清理掉value为空数组项
  143. // {1: [],2:["好用"]} -to-> {2:["好用"]}
  144. Object.keys(alignmentHub).forEach(key => { if (alignmentHub[key].length == 0) delete alignmentHub[key] });
  145. }
  146. // 对齐
  147. function alignment(str1 = "", str2 = "") {
  148. // 返回 {"1",["我","用"],"2":["推荐","可以"]}
  149. if (str1.length != str2.length) {
  150. throw new Error("算法错误,对齐字符串长度不一致");
  151. }
  152. const overlappingBlocks = {}
  153. let currentOverlappingBlock = "";
  154. for (let i = str1.length - 1; i >= 0; i--) {
  155. if (str1[i] == str2[i]) {
  156. currentOverlappingBlock = str1[i] + currentOverlappingBlock;
  157. if (i - 1 >= 0) continue;
  158. }
  159. if (currentOverlappingBlock.length == 0) {
  160. continue;
  161. } else {
  162. // 块收集
  163. let currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length]
  164. if (currentOverlappingBlockContainer == undefined) {
  165. currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length] = [currentOverlappingBlock]
  166. } else if (!currentOverlappingBlockContainer.includes(currentOverlappingBlock)) {
  167. currentOverlappingBlockContainer.push(currentOverlappingBlock)
  168. }
  169. }
  170. currentOverlappingBlock = "";
  171. }
  172. return overlappingBlocks;
  173. }
  174.  
  175. // 【同步算法-堆实现】
  176. function sortAndSync(arr1, arr2, order = 'desc') {
  177. if (arr1.length !== arr2.length) {
  178. throw new Error("Both arrays must have the same length");
  179. }
  180. function swap(arr, i, j) {
  181. [arr[i], arr[j]] = [arr[j], arr[i]];
  182. }
  183. function heapify(arr1, arr2, n, i, order) {
  184. let largest = i;
  185. let left = 2 * i + 1;
  186. let right = 2 * i + 2;
  187.  
  188. if (order === 'asc') {
  189. if (left < n && arr1[left] > arr1[largest]) {
  190. largest = left;
  191. }
  192. if (right < n && arr1[right] > arr1[largest]) {
  193. largest = right;
  194. }
  195. } else {
  196.  
  197. if (left < n && arr1[left] < arr1[largest]) {
  198. largest = left;
  199. }
  200. if (right < n && arr1[right] < arr1[largest]) {
  201. largest = right;
  202. }
  203. }
  204.  
  205. if (largest !== i) {
  206. swap(arr1, i, largest);
  207. swap(arr2, i, largest);
  208. heapify(arr1, arr2, n, largest, order);
  209. }
  210. }
  211. function buildHeap(arr1, arr2, n, order) {
  212. for (let i = Math.floor(n / 2) - 1; i >= 0; i--) {
  213. heapify(arr1, arr2, n, i, order);
  214. }
  215. }
  216. function heapSort(arr1, arr2, order) {
  217. let n = arr1.length;
  218. buildHeap(arr1, arr2, n, order);
  219.  
  220. for (let i = n - 1; i > 0; i--) {
  221. swap(arr1, 0, i);
  222. swap(arr2, 0, i);
  223. heapify(arr1, arr2, i, 0, order);
  224. }
  225. }
  226. heapSort(arr1, arr2, order);
  227. }
  228.  
  229. // 【算法测试1】
  230. // console.log("-- 算法测试开始 --")
  231. // console.log(findOverlapBlocks("[推荐]sumatrapdf非常好用","pdf 推荐"))
  232. // console.log("-- 算法测试结束 --")
  233.  
  234. // 【算法测试2】
  235. // console.log("匹配度:", overlapMatchingDegree("好用的pdf工具", { "sumatrapdf": 10, "小而好用的pdf阅读器": 8, "https://www.sumatrapdfreader.org/downloadafter": 3 }));
  236.