Text Frequency Analyzer with ASIN Page Support

Extract text from specific spans based on page URL, remove stop words, and do word frequency analysis.

  1. // ==UserScript==
  2. // @name Text Frequency Analyzer with ASIN Page Support
  3. // @namespace http://tampermonkey.net/
  4. // @version 1.10
  5. // @description Extract text from specific spans based on page URL, remove stop words, and do word frequency analysis.
  6. // @author Your Name
  7. // @match https://www.amazon.com/gp/bestsellers*
  8. // @match https://www.amazon.com/*/dp/*
  9. // @match https://www.amazon.com/dp/*
  10. // @match https://www.amazon.com/s?k=*
  11. // @match https://www.amazon.com/s?*
  12.  
  13. // @license MIT
  14. // ==/UserScript==
  15.  
  16. (function () {
  17. 'use strict';
  18.  
  19. const stopWords = new Set([
  20. 'with', 'of', 'for', 'and', 'at', 'if', 'to', 'on', 'by', 'from', 'as', 'than', 'too',
  21. // ...省略其他停用词
  22. 'now'
  23. ]);
  24.  
  25. function extractText() {
  26. let spans;
  27. const url = window.location.href;
  28.  
  29. if (url.includes('/gp/bestsellers')) {
  30. spans = document.querySelectorAll('._cDEzb_p13n-sc-css-line-clamp-3_g3dy1');
  31. } else if (url.includes('/s?k=')) {
  32. spans = document.querySelectorAll('.a-size-base-plus.a-color-base.a-text-normal, .a-size-medium.a-color-base.a-text-normal');
  33. } else if (url.includes('/dp/')) {
  34. const titleElement = document.getElementById('productTitle');
  35. const miniElements = document.querySelectorAll('.a-unordered-list.a-vertical.a-spacing-mini .a-spacing-mini');
  36. let textContent = titleElement ? titleElement.innerText : '';
  37. miniElements.forEach(el => {
  38. textContent += ' ' + el.innerText;
  39. });
  40. return textContent.trim();
  41. } else {
  42. alert('This script is not configured for this page.');
  43. return '';
  44. }
  45.  
  46. let textContent = '';
  47. spans.forEach(span => {
  48. textContent += span.innerText + ' ';
  49. });
  50. return textContent.trim();
  51. }
  52.  
  53. function cleanText(text) {
  54. return text.toLowerCase()
  55. .replace(/[^a-z0-9\s\/"'.-]/g, '') // 保留特定符号
  56. .replace(/\s+/g, ' ')
  57. .trim();
  58. }
  59.  
  60. function getWords(text, removeStopWords = true) {
  61. const words = cleanText(text).split(/\s+/).filter(Boolean);
  62. if (removeStopWords) {
  63. return words.filter(word => !stopWords.has(word));
  64. }
  65. return words;
  66. }
  67.  
  68. function countFrequencies(words, n) {
  69. const freqMap = new Map();
  70. for (let i = 0; i <= words.length - n; i++) {
  71. const phrase = words.slice(i, i + n).join(' ');
  72. freqMap.set(phrase, (freqMap.get(phrase) || 0) + 1);
  73. }
  74. return Array.from(freqMap.entries()).sort((a, b) => b[1] - a[1]).slice(0, 10);
  75. }
  76.  
  77. function removePreviousHighlights() {
  78. const highlightedElements = document.querySelectorAll('.highlight');
  79. highlightedElements.forEach(el => {
  80. el.outerHTML = el.innerText; // Replace the span with its text content
  81. });
  82. }
  83.  
  84. function highlightText(phrase) {
  85. removePreviousHighlights(); // Remove previous highlights
  86.  
  87. const url = window.location.href;
  88. const regex = new RegExp(`(${phrase.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
  89.  
  90. if (url.includes('/dp/')) {
  91. const titleElement = document.getElementById('productTitle');
  92. if (titleElement) {
  93. titleElement.innerHTML = titleElement.innerHTML.replace(regex, '<span class="highlight">$1</span>');
  94. }
  95. const miniElements = document.querySelectorAll('.a-unordered-list.a-vertical.a-spacing-mini .a-spacing-mini');
  96. miniElements.forEach(el => {
  97. el.innerHTML = el.innerHTML.replace(regex, '<span class="highlight">$1</span>');
  98. });
  99. } else {
  100. const classesToSearch = [
  101. '_cDEzb_p13n-sc-css-line-clamp-3_g3dy1',
  102. 'a-size-base-plus.a-color-base.a-text-normal',
  103. 'a-size-medium.a-color-base.a-text-normal'
  104. ];
  105.  
  106. classesToSearch.forEach(className => {
  107. document.querySelectorAll(`.${className}`).forEach(span => {
  108. span.innerHTML = span.innerHTML.replace(regex, '<span class="highlight">$1</span>');
  109. });
  110. });
  111. }
  112. }
  113.  
  114. function displayResults(results) {
  115. const resultDiv = document.createElement('div');
  116. resultDiv.style.position = 'fixed';
  117. resultDiv.style.top = '10px';
  118. resultDiv.style.right = '10px';
  119. resultDiv.style.backgroundColor = 'white';
  120. resultDiv.style.border = '1px solid black';
  121. resultDiv.style.padding = '10px';
  122. resultDiv.style.zIndex = '10000';
  123. resultDiv.style.maxHeight = '90vh';
  124. resultDiv.style.overflowY = 'auto';
  125. resultDiv.innerHTML = '<h2>Word Frequency Analysis</h2>';
  126.  
  127. results.forEach(([label, data]) => {
  128. const title = document.createElement('h3');
  129. title.textContent = label;
  130. resultDiv.appendChild(title);
  131. const list = document.createElement('ul');
  132. data.forEach(([phrase, count]) => {
  133. const listItem = document.createElement('li');
  134. listItem.textContent = `${phrase}: ${count}`;
  135. listItem.addEventListener('click', () => highlightText(phrase)); // 绑定点击事件
  136. list.appendChild(listItem);
  137. });
  138. resultDiv.appendChild(list);
  139. });
  140.  
  141. document.body.appendChild(resultDiv);
  142. }
  143.  
  144. function analyzeText() {
  145. const text = extractText();
  146. if (!text) {
  147. alert('No text found in the specified spans.');
  148. return;
  149. }
  150.  
  151. const wordsForSingle = getWords(text);
  152. const wordsForPhrases = getWords(text, false);
  153.  
  154. const results = [
  155. ['Top 10 Single Words', countFrequencies(wordsForSingle, 1)],
  156. ['Top 10 Two-Word Phrases', countFrequencies(wordsForPhrases, 2)],
  157. ['Top 10 Three-Word Phrases', countFrequencies(wordsForPhrases, 3)],
  158. ['Top 10 Four-Word Phrases', countFrequencies(wordsForPhrases, 4)]
  159. ];
  160.  
  161. displayResults(results);
  162. }
  163.  
  164. const highlightStyle = document.createElement('style');
  165. highlightStyle.innerHTML = `
  166. .highlight {
  167. background-color: yellow;
  168. font-weight: bold;
  169. }
  170. `;
  171. document.head.appendChild(highlightStyle);
  172.  
  173. const analyzeButton = document.createElement('button');
  174. analyzeButton.textContent = 'Analyze Text Frequency';
  175. analyzeButton.style.position = 'fixed';
  176. analyzeButton.style.bottom = '10px';
  177. analyzeButton.style.right = '10px';
  178. analyzeButton.style.zIndex = '10000';
  179. analyzeButton.style.padding = '10px 20px';
  180. analyzeButton.style.backgroundColor = '#007bff';
  181. analyzeButton.style.color = 'white';
  182. analyzeButton.style.border = 'none';
  183. analyzeButton.style.borderRadius = '5px';
  184. analyzeButton.style.cursor = 'pointer';
  185.  
  186. analyzeButton.addEventListener('click', analyzeText);
  187.  
  188. document.body.appendChild(analyzeButton);
  189. })();