Twitter Direct

Remove t.co tracking links from Twitter

当前为 2021-03-03 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Twitter Direct
  3. // @description Remove t.co tracking links from Twitter
  4. // @author chocolateboy
  5. // @copyright chocolateboy
  6. // @version 1.4.2
  7. // @namespace https://github.com/chocolateboy/userscripts
  8. // @license GPL: https://www.gnu.org/copyleft/gpl.html
  9. // @include https://twitter.com/
  10. // @include https://twitter.com/*
  11. // @include https://mobile.twitter.com/
  12. // @include https://mobile.twitter.com/*
  13. // @require https://unpkg.com/@chocolateboy/uncommonjs@3.1.2/dist/polyfill.iife.min.js
  14. // @require https://unpkg.com/get-wild@1.4.1/dist/index.umd.min.js
  15. // @require https://unpkg.com/gm-compat@1.1.0/dist/index.iife.min.js
  16. // @require https://unpkg.com/just-safe-set@2.1.0/index.js
  17. // @run-at document-start
  18. // ==/UserScript==
  19.  
  20. /*
  21. * a pattern which matches the content-type header of responses we scan for
  22. * URLs: "application/json" or "application/json; charset=utf-8"
  23. */
  24. const CONTENT_TYPE = /^application\/json\b/
  25.  
  26. /*
  27. * the minimum size (in bytes) of documents we deem to be "not small"
  28. *
  29. * we log (to the console) misses (i.e. no URLs ever found/replaced) in
  30. * documents whose size is greater than or equal to this value
  31. *
  32. * if we keep failing to find URLs in large documents, we may be able to speed
  33. * things up by blacklisting them, at least in theory
  34. *
  35. * (in practice, URL data is optional in most of the matched document types
  36. * (contained in arrays that can be empty), so an absence of URLs doesn't
  37. * necessarily mean URL data will never be included...)
  38. */
  39. const LOG_THRESHOLD = 1024
  40.  
  41. /*
  42. * an immutable array used in various places as a way to indicate "no values".
  43. * static to avoid unnecessary allocations.
  44. */
  45. const NONE = []
  46.  
  47. /*
  48. * used to keep track of which queries (don't) have matching URIs and which URIs
  49. * (don't) have matching queries
  50. */
  51. const STATS = { root: {}, uri: {} }
  52.  
  53. /*
  54. * the domain intercepted links are routed through
  55. *
  56. * not all links are intercepted. exceptions include links to twitter (e.g.
  57. * https://twitter.com) and card URIs (e.g. card://123456)
  58. */
  59. const TRACKING_DOMAIN = 't.co'
  60.  
  61. /*
  62. * a pattern which matches the domain(s) we expect data (JSON) to come from.
  63. * responses which don't come from a matching domain are ignored.
  64. */
  65. const TWITTER_API = /^(?:(?:api|mobile)\.)?twitter\.com$/
  66.  
  67. /*
  68. * default locations to search for URL metadata (arrays of objects) within tweet
  69. * nodes
  70. */
  71. const TWEET_PATHS = [
  72. 'entities.media',
  73. 'entities.urls',
  74. 'extended_entities.media',
  75. 'extended_entities.urls',
  76. ]
  77.  
  78. /*
  79. * default locations to search for URL metadata (arrays of objects) within
  80. * user/profile nodes
  81. */
  82. const USER_PATHS = [
  83. 'entities.description.urls',
  84. 'entities.url.urls',
  85. ]
  86.  
  87. /*
  88. * a router which matches URIs (pathnames) to queries. each query contains a
  89. * root path (required) and some additional options which specify the locations
  90. * under the root path to substitute URLs in.
  91. *
  92. * implemented as an array of pairs with URI-pattern keys (string(s) or
  93. * regexp(s)) and one or more queries as the value. if a query is a path (string
  94. * or array) it is converted into an object with the path as its `root`
  95. * property.
  96. *
  97. * options:
  98. *
  99. * - root (required): a path (string or array of steps) into the document
  100. * under which to begin searching
  101. *
  102. * - collect (default: Object.values): a function which takes a root node and
  103. * turns it into an array of context nodes to scan for URL data
  104. *
  105. * - scan (default: USER_PATHS): an array of paths to probe for arrays of
  106. * { url, expanded_url } pairs in a context node
  107. *
  108. * - targets (default: NONE): an array of paths to standalone URLs (URLs that
  109. * don't have an accompanying expansion), e.g. for URLs in cards embedded in
  110. * tweets. these URLs are replaced by expanded URLs gathered during the
  111. * scan.
  112. *
  113. * target paths can point directly to a URL node (string) or to an
  114. * array of objects. in the latter case, we find the URL object in the array
  115. * (obj.key === "card_url") and replace its URL node (obj.value.string_value)
  116. *
  117. * if a target path is an object containing a { url: path, expanded_url: path }
  118. * pair, the URL is expanded directly in the same way as scanned paths.
  119. */
  120. const MATCH = [
  121. [
  122. // e.g. '/1.1/users/lookup.json',
  123. /\/lookup\.json$/, {
  124. root: NONE, // returns self
  125. }
  126. ],
  127. [
  128. /\/Conversation$/, [
  129. 'data.conversation_timeline.instructions.*.moduleItems.*.item.itemContent.tweet.core.user.legacy',
  130. 'data.conversation_timeline.instructions.*.entries.*.content.items.*.item.itemContent.tweet.core.user.legacy',
  131. {
  132. root: 'data.conversation_timeline.instructions.*.moduleItems.*.item.itemContent.tweet.legacy',
  133. scan: TWEET_PATHS,
  134. targets: ['card.binding_values', 'card.url'],
  135. },
  136. {
  137. root: 'data.conversation_timeline.instructions.*.entries.*.content.items.*.item.itemContent.tweet.legacy',
  138. scan: TWEET_PATHS,
  139. targets: ['card.binding_values', 'card.url'],
  140. },
  141. ]
  142. ],
  143. [
  144. /\/Favoriters$/,
  145. 'data.favoriters_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  146. ],
  147. [
  148. /\/Following$/,
  149. 'data.user.following_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  150. ],
  151. [
  152. /\/Followers$/,
  153. 'data.user.followers_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  154. ],
  155. [
  156. /\/FollowersYouKnow$/,
  157. 'data.user.friends_following_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  158. ],
  159. [
  160. /\/ListMembers$/,
  161. 'data.list.members_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy'
  162. ],
  163. [
  164. /\/ListSubscribers$/,
  165. 'data.list.subscribers_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  166. ],
  167. [
  168. /\/Retweeters/,
  169. 'data.retweeters_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy'
  170. ],
  171. [
  172. // used for hovercard data
  173. /\/UserByScreenName$/, {
  174. root: 'data.user.legacy',
  175. collect: Array.of,
  176. }
  177. ],
  178. [
  179. // DMs
  180. // e.g. '/1.1/dm/inbox_initial_state.json' and '/1.1/dm/user_updates.json'
  181. /\/(?:inbox_initial_state|user_updates)\.json$/, {
  182. root: 'inbox_initial_state.entries.*.message.message_data',
  183. scan: TWEET_PATHS,
  184. targets: [
  185. 'attachment.card.binding_values.card_url.string_value',
  186. 'attachment.card.url',
  187. ],
  188. }
  189. ],
  190. [
  191. // e.g. '/1.1/friends/following/list.json',
  192. /\/list\.json$/,
  193. 'users.*'
  194. ],
  195. ]
  196.  
  197. /*
  198. * a single { pattern => queries } pair for the router which matches all URIs
  199. */
  200. const WILDCARD = [
  201. /./,
  202. [
  203. {
  204. root: 'globalObjects.tweets',
  205. scan: TWEET_PATHS,
  206. targets: [{
  207. url: 'card.binding_values.website_shortened_url.string_value',
  208. expanded_url: 'card.binding_values.website_url.string_value',
  209. },
  210. 'card.binding_values.card_url.string_value',
  211. 'card.url',
  212. ],
  213. },
  214. 'globalObjects.tweets.*.card.users.*',
  215. 'globalObjects.users',
  216. ]
  217. ]
  218.  
  219. /*
  220. * a custom version of get-wild's `get` function which uses a simpler/faster
  221. * path parser since we don't use the extended syntax
  222. */
  223. const get = exports.getter({ split: '.' })
  224.  
  225. /*
  226. * a helper function which returns true if the supplied value is a plain object,
  227. * false otherwise
  228. */
  229. const isPlainObject = (function () {
  230. const toString = {}.toString
  231. // only used with JSON data, so we don't need this to be foolproof
  232. return value => toString.call(value) === '[object Object]'
  233. })()
  234.  
  235. /*
  236. * a helper function which iterates over the supplied iterable, filtering out
  237. * missing (undefined) values.
  238. *
  239. * this is done in one pass (rather than map + filter) as there may potentially
  240. * be dozens or even hundreds of values, e.g. contexts (tweet/user objects)
  241. * under a root node
  242. */
  243. function eachDefined (iterable, fn) {
  244. for (const value of iterable) {
  245. if (value) fn(value)
  246. }
  247. }
  248.  
  249. /**
  250. * a helper function which returns true if the supplied URL is tracked by
  251. * Twitter, false otherwise
  252. */
  253. function isTracked (url) {
  254. return (new URL(url)).hostname === TRACKING_DOMAIN
  255. }
  256.  
  257. /*
  258. * JSON.stringify helper used to serialize stats data
  259. */
  260. function replacer (_key, value) {
  261. return (value instanceof Set) ? Array.from(value) : value
  262. }
  263.  
  264. /*
  265. * an iterator which returns { pattern => queries } pairs where patterns
  266. * are strings/regexps which match a URI and queries are objects which
  267. * define substitutions to perform in the matched document.
  268. *
  269. * this forms the basis of a simple "router" which tries all URI patterns
  270. * until one matches (or none match) and then additionally performs a
  271. * wildcard match which works on all URIs.
  272. *
  273. * the URI patterns are disjoint, so there's no need to try them all if one
  274. * matches. in addition to these, some substitutions are non URI-specific,
  275. * i.e. they work on documents that aren't matched by URI (e.g.
  276. * profile.json) and documents that are (e.g. list.json). currently the
  277. * latter all transform locations under obj.globalObjects, so we check for
  278. * the existence of that property before yielding these catch-all queries
  279. */
  280. function* router (state, data) {
  281. for (const [key, value] of MATCH) {
  282. yield [key, value]
  283.  
  284. if (state.matched) {
  285. break
  286. }
  287. }
  288.  
  289. if ('globalObjects' in data) {
  290. yield WILDCARD
  291. }
  292. }
  293.  
  294. /*
  295. * a helper class which implements document-specific (MATCH) and generic
  296. * (WILDCARD) URL substitutions in nodes (subtrees) within a JSON-formatted
  297. * document returned by the Twitter API.
  298. *
  299. * a transformer is instantiated for each query and its methods are passed a
  300. * context (node within the document tree) and the value of an option from the
  301. * query, e.g. the `scan` option is handled by the `_scan` method and the
  302. * `targets` option is processed by the `_assign` method
  303. */
  304. class Transformer {
  305. constructor ({ onReplace, root, uri }) {
  306. this._cache = new Map()
  307. this._onReplace = onReplace
  308. this._root = root
  309. this._uri = uri
  310. }
  311.  
  312. /*
  313. * expand URLs in context nodes in the locations specified by the query's
  314. * `scan` and `targets` options
  315. */
  316. // @ts-ignore https://github.com/microsoft/TypeScript/issues/14279
  317. transform (contexts, scan, targets) {
  318. // scan the context nodes for { url, expanded_url } pairs, replace
  319. // each t.co URL with its expansion, and add the mappings to the
  320. // cache
  321. eachDefined(contexts, context => this._scan(context, scan))
  322.  
  323. // do a separate pass for targets because some nested card URLs are
  324. // expanded in other (earlier) tweets under the same root
  325. if (targets.length) {
  326. eachDefined(contexts, context => this._assign(context, targets))
  327. }
  328. }
  329.  
  330. /*
  331. * scan the context node for { url, expanded_url } pairs, replace each t.co
  332. * URL with its expansion, and add the mappings to the cache
  333. */
  334. _scan (context, paths) {
  335. const { _cache: cache, _onReplace: onReplace } = this
  336.  
  337. for (const path of paths) {
  338. const items = get(context, path, NONE)
  339.  
  340. for (const item of items) {
  341. if (item.url && item.expanded_url) {
  342. if (isTracked(item.url)) {
  343. cache.set(item.url, item.expanded_url)
  344. item.url = item.expanded_url
  345. onReplace()
  346. }
  347. } else {
  348. console.warn("can't find url/expanded_url pair for:", {
  349. uri: this._uri,
  350. root: this._root,
  351. path,
  352. item,
  353. })
  354. }
  355. }
  356. }
  357. }
  358.  
  359. /*
  360. * replace URLs in the context which weren't substituted during the scan.
  361. *
  362. * these are either standalone URLs whose expansion we retrieve from the
  363. * cache, or URLs whose expansion exists in the context in a location not
  364. * covered by the scan
  365. */
  366. _assign (context, targets) {
  367. for (const target of targets) {
  368. if (isPlainObject(target)) {
  369. this._assignFromPath(context, target)
  370. } else {
  371. this._assignFromCache(context, target)
  372. }
  373. }
  374. }
  375.  
  376. /*
  377. * replace a short URL in the context with an expanded URL defined in the
  378. * context.
  379. *
  380. * this is similar to the replacements performed during the scan, but rather
  381. * than using a fixed set of locations/property names, the paths to the
  382. * short/expanded URLs are supplied as a parameter
  383. */
  384. _assignFromPath (context, target) {
  385. const { url: urlPath, expanded_url: expandedUrlPath } = target
  386.  
  387. let url, expandedUrl
  388.  
  389. if (
  390. (url = get(context, urlPath))
  391. && isTracked(url)
  392. && (expandedUrl = get(context, expandedUrlPath))
  393. ) {
  394. this._cache.set(url, expandedUrl)
  395. exports.set(context, urlPath, expandedUrl)
  396. this._onReplace()
  397. }
  398. }
  399.  
  400. /*
  401. * pinpoint an isolated URL in the context which doesn't have a
  402. * corresponding expansion, and replace it using the mappings we collected
  403. * during the scan
  404. */
  405. _assignFromCache (context, path) {
  406. let url, $context = context, $path = path
  407.  
  408. const node = get(context, path)
  409.  
  410. // if the target points to an array rather than a string, locate the URL
  411. // object within the array automatically
  412. if (Array.isArray(node)) {
  413. if ($context = node.find(it => it.key === 'card_url')) {
  414. $path = 'value.string_value'
  415. url = get($context, $path)
  416. }
  417. } else {
  418. url = node
  419. }
  420.  
  421. if (typeof url === 'string' && isTracked(url)) {
  422. const expandedUrl = this._cache.get(url)
  423.  
  424. if (expandedUrl) {
  425. exports.set($context, $path, expandedUrl)
  426. this._onReplace()
  427. } else {
  428. console.warn(`can't find expanded URL for ${url} in ${this._uri}`)
  429. }
  430. }
  431. }
  432. }
  433.  
  434. /*
  435. * replace t.co URLs with the original URL in all locations in the document
  436. * which contain URLs
  437. */
  438. function transform (data, uri) {
  439. let count = 0
  440.  
  441. if (!STATS.uri[uri]) {
  442. STATS.uri[uri] = new Set()
  443. }
  444.  
  445. const state = { matched: false }
  446. const it = router(state, data)
  447.  
  448. for (const [key, value] of it) {
  449. const uris = NONE.concat(key) // coerce to an array
  450. const queries = NONE.concat(value)
  451. const match = uris.some(want => {
  452. return (typeof want === 'string') ? (uri === want) : want.test(uri)
  453. })
  454.  
  455. if (match) {
  456. // stop matching URIs and switch to the wildcard queries
  457. state.matched = true
  458. } else {
  459. // try the next URI pattern, or switch to the wildcard queries if
  460. // there are no more patterns to match against
  461. continue
  462. }
  463.  
  464. for (const $query of queries) {
  465. const query = isPlainObject($query) ? $query : { root: $query }
  466. const { root: rootPath } = query
  467.  
  468. if (!STATS.root[rootPath]) {
  469. STATS.root[rootPath] = new Set()
  470. }
  471.  
  472. const root = get(data, rootPath)
  473.  
  474. // may be an array (e.g. lookup.json)
  475. if (!root || typeof root !== 'object') {
  476. continue
  477. }
  478.  
  479. const {
  480. collect = Object.values,
  481. scan = USER_PATHS,
  482. targets = NONE,
  483. } = query
  484.  
  485. const updateStats = () => {
  486. ++count
  487. STATS.uri[uri].add(rootPath)
  488. STATS.root[rootPath].add(uri)
  489. }
  490.  
  491. const contexts = collect(root)
  492.  
  493. const transformer = new Transformer({
  494. onReplace: updateStats,
  495. root: rootPath,
  496. uri
  497. })
  498.  
  499. // @ts-ignore https://github.com/microsoft/TypeScript/issues/14279
  500. transformer.transform(contexts, scan, targets)
  501. }
  502. }
  503.  
  504. return count
  505. }
  506.  
  507. /*
  508. * replacement for Twitter's default response handler. we transform the response
  509. * if it's a) JSON and b) contains URL data; otherwise, we leave it unchanged
  510. */
  511. function onResponse (xhr, uri) {
  512. const contentType = xhr.getResponseHeader('Content-Type')
  513.  
  514. if (!CONTENT_TYPE.test(contentType)) {
  515. return
  516. }
  517.  
  518. const url = new URL(uri)
  519.  
  520. // exclude e.g. the config-<date>.json file from pbs.twimg.com, which is the
  521. // second biggest document (~500K) after home_latest.json (~700K)
  522. if (!TWITTER_API.test(url.hostname)) {
  523. return
  524. }
  525.  
  526. const json = xhr.responseText
  527. const size = json.length
  528.  
  529. // fold URIs which differ only in the user ID, e.g.:
  530. // /2/timeline/profile/1234.json -> /2/timeline/profile.json
  531. const path = url.pathname.replace(/\/\d+\.json$/, '.json')
  532.  
  533. let data
  534.  
  535. try {
  536. data = JSON.parse(json)
  537. } catch (e) {
  538. console.error(`Can't parse JSON for ${uri}:`, e)
  539. return
  540. }
  541.  
  542. const oldStats = JSON.stringify(STATS, replacer)
  543. const count = transform(data, path)
  544.  
  545. if (!count) {
  546. if (STATS.uri[path].size === 0 && size >= LOG_THRESHOLD) {
  547. console.debug(`no replacements in ${path} (${size} B)`)
  548. }
  549.  
  550. return
  551. }
  552.  
  553. const descriptor = { value: JSON.stringify(data) }
  554. const clone = GMCompat.export(descriptor)
  555.  
  556. GMCompat.unsafeWindow.Object.defineProperty(xhr, 'responseText', clone)
  557.  
  558. const newStats = JSON.stringify(STATS, replacer)
  559.  
  560. if (newStats !== oldStats) {
  561. const replacements = 'replacement' + (count === 1 ? '' : 's')
  562. console.debug(`${count} ${replacements} in ${path} (${size} B)`)
  563. console.log(JSON.parse(newStats))
  564. }
  565. }
  566.  
  567. /*
  568. * replace the built-in XHR#send method with our custom version which swaps in
  569. * our custom response handler. once done, we delegate to the original handler
  570. * (this.onreadystatechange)
  571. */
  572. function hookXHRSend (oldSend) {
  573. return /** @this {XMLHttpRequest} */ function send (body = null) {
  574. const oldOnReadyStateChange = this.onreadystatechange
  575.  
  576. this.onreadystatechange = function (event) {
  577. if (this.readyState === this.DONE && this.responseURL && this.status === 200) {
  578. onResponse(this, this.responseURL)
  579. }
  580.  
  581. if (oldOnReadyStateChange) {
  582. oldOnReadyStateChange.call(this, event)
  583. }
  584. }
  585.  
  586. oldSend.call(this, body)
  587. }
  588. }
  589.  
  590. /*
  591. * replace the default XHR#send with our custom version, which scans responses
  592. * for tweets and expands their URLs
  593. */
  594. const xhrProto = GMCompat.unsafeWindow.XMLHttpRequest.prototype
  595.  
  596. xhrProto.send = GMCompat.export(hookXHRSend(xhrProto.send))