Text Extraction - navigate

Extracts tweets with a real-time board, save as JSON.

  1. // ==UserScript==
  2. // @name Text Extraction - navigate
  3. // @namespace http://tampermonkey.net/
  4. // @version 1.4.1
  5. // @description Extracts tweets with a real-time board, save as JSON.
  6. // @match https://x.com/*
  7. // @license MIT
  8. // @grant
  9. // ==/UserScript==
  10.  
  11. /* MIT License
  12. *
  13. * Copyright (c) 2024 [955whynot]
  14. *
  15. * Permission is hereby granted, free of charge, to any person obtaining a copy
  16. * of this software and associated documentation files (the "Software"), to deal
  17. * in the Software without restriction, including without limitation the rights
  18. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  19. * copies of the Software, and to permit persons to whom the Software is
  20. * furnished to do so, subject to the following conditions:
  21. *
  22. * The above copyright notice and this permission notice shall be included in all
  23. * copies or substantial portions of the Software.
  24. *
  25. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  26. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  27. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  28. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  29. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  30. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31. * SOFTWARE.
  32. */
  33.  
  34. (function () {
  35. 'use strict';
  36.  
  37. // Add error handling for localStorage
  38. function safelyGetFromStorage() {
  39. try {
  40. return JSON.parse(localStorage.getItem('extractedData')) || [];
  41. } catch (e) {
  42. console.error('Error reading from localStorage:', e);
  43. return [];
  44. }
  45. }
  46.  
  47. const extractedData = safelyGetFromStorage();
  48.  
  49. let observer; // To hold the MutationObserver instance
  50.  
  51. // Create floating controls (board, start button, and other UI)
  52. const controls = document.createElement('div');
  53. controls.style.position = 'fixed';
  54. controls.style.bottom = '10px';
  55. controls.style.right = '10px';
  56. controls.style.backgroundColor = 'rgba(0, 0, 0, 0.8)';
  57. controls.style.color = 'white';
  58. controls.style.padding = '10px';
  59. controls.style.fontSize = '14px';
  60. controls.style.zIndex = '9999';
  61. document.body.appendChild(controls);
  62.  
  63. const clearButton = document.createElement('button');
  64. clearButton.textContent = 'Clear';
  65. clearButton.style.backgroundColor = 'maroon';
  66. clearButton.style.color = 'white';
  67. clearButton.style.fontWeight = "700";
  68. clearButton.style.padding = '6px';
  69. clearButton.style.marginBottom = '10px';
  70. clearButton.style.marginLeft = '8px';
  71. controls.appendChild(clearButton);
  72.  
  73. const display = document.createElement('div');
  74. display.style.maxHeight = '200px';
  75. display.style.overflowY = 'auto';
  76. display.style.border = '1px solid white';
  77. display.style.padding = '5px';
  78. display.style.marginTop = '10px';
  79. display.innerHTML = '<strong>Extracted Data:</strong><br>';
  80. controls.appendChild(display);
  81.  
  82. const saveButton = document.createElement('button');
  83. saveButton.textContent = 'Save as JSON';
  84. saveButton.style.marginTop = '10px';
  85. saveButton.style.backgroundColor = '#FF8C00';
  86. saveButton.style.color = 'white';
  87. saveButton.style.fontWeight = "700";
  88. saveButton.style.padding = '5px';
  89. controls.appendChild(saveButton);
  90.  
  91. // Add counter to display
  92. const counter = document.createElement('div');
  93. counter.style.marginTop = '5px';
  94. counter.style.fontWeight = 'bold';
  95. counter.textContent = `Total Items: ${extractedData.length}`;
  96. controls.insertBefore(counter, display);
  97.  
  98. // Function to update the display board
  99. function updateDisplay() {
  100. try {
  101. display.innerHTML = '<strong>Extracted Data:</strong><br>' +
  102. extractedData.map((item, index) => `<div><strong>Div ${index + 1}:</strong> ${item}</div>`).join('');
  103. counter.textContent = `Total Items: ${extractedData.length}`;
  104. localStorage.setItem('extractedData', JSON.stringify(extractedData));
  105. } catch (e) {
  106. console.error('Error updating display:', e);
  107. }
  108. }
  109.  
  110. // Function to extract data from a single div, preserving sequence
  111. function extractFromDiv(div) {
  112. let result = '';
  113. const children = div.childNodes;
  114.  
  115. children.forEach((child) => {
  116. if (child.nodeType === Node.TEXT_NODE) {
  117. const text = child.textContent.trim();
  118. if (text) {
  119. result += (!result.endsWith(' ') ? ' ' : '') + text;
  120. }
  121. } else if (child.nodeType === Node.ELEMENT_NODE) {
  122. if (child.tagName === 'SPAN') {
  123. const text = child.innerText.trim();
  124. if (text) {
  125. result += (!result.endsWith(' ') ? ' ' : '') + text;
  126. }
  127. } else if (child.tagName === 'IMG') {
  128. const alt = child.getAttribute('alt');
  129. if (alt) {
  130. result += alt;
  131. }
  132. } else if (child.tagName === 'A' && child.getAttribute('role') === 'link') {
  133. const hashtag = child.innerText.trim();
  134. if (hashtag) {
  135. result += (!result.endsWith(' ') ? ' ' : '') + hashtag;
  136. }
  137. }
  138. }
  139. });
  140.  
  141. return result.trim();
  142. }
  143.  
  144. // Function to extract from all divs
  145. function extractAll() {
  146. if (window.location.pathname.startsWith('/search')) {
  147. const targetDivs = document.querySelectorAll('div[data-testid="tweetText"]');
  148. targetDivs.forEach((div) => {
  149. const divData = extractFromDiv(div);
  150.  
  151. if (divData && !extractedData.includes(divData)) {
  152. extractedData.push(divData);
  153. updateDisplay();
  154. }
  155. });
  156. } else {
  157. console.log('Extraction skipped. Not on /search.');
  158. }
  159. }
  160.  
  161. // Function to enable the extraction process
  162. function enableExtraction() {
  163. extractAll();
  164. observer = new MutationObserver(() => {
  165. if (window.location.pathname.startsWith('/search')) {
  166. extractAll();
  167. }
  168. });
  169. observer.observe(document.body, { childList: true, subtree: true });
  170. console.log('Extraction enabled.');
  171. }
  172.  
  173. // Function to disable the extraction process
  174. function disableExtraction() {
  175. if (observer) observer.disconnect();
  176. console.log('Extraction disabled.');
  177. }
  178.  
  179. // Monitor navigation dynamically
  180. function monitorNavigation() {
  181. if (window.location.pathname.startsWith('/search')) {
  182. enableExtraction();
  183. } else {
  184. disableExtraction();
  185. }
  186. }
  187.  
  188. // Attach functionality to buttons
  189. clearButton.onclick = () => {
  190. extractedData.length = 0;
  191. updateDisplay();
  192. };
  193.  
  194. saveButton.onclick = () => {
  195. const blob = new Blob([JSON.stringify(extractedData, null, 2)], { type: 'application/json' });
  196. const url = URL.createObjectURL(blob);
  197. const a = document.createElement('a');
  198. a.href = url;
  199. a.download = 'extracted_data.json';
  200. document.body.appendChild(a);
  201. a.click();
  202. document.body.removeChild(a);
  203. URL.revokeObjectURL(url);
  204. };
  205.  
  206. // Monitor navigation dynamically using popstate and hashchange
  207. window.addEventListener('popstate', monitorNavigation);
  208. window.addEventListener('hashchange', monitorNavigation);
  209.  
  210. // Initial check for current page
  211. monitorNavigation();
  212. })();