Twitter Direct

Remove t.co tracking links from Twitter

当前为 2021-04-19 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Twitter Direct
  3. // @description Remove t.co tracking links from Twitter
  4. // @author chocolateboy
  5. // @copyright chocolateboy
  6. // @version 1.6.0
  7. // @namespace https://github.com/chocolateboy/userscripts
  8. // @license GPL
  9. // @include https://twitter.com/
  10. // @include https://twitter.com/*
  11. // @include https://mobile.twitter.com/
  12. // @include https://mobile.twitter.com/*
  13. // @require https://unpkg.com/@chocolateboy/uncommonjs@3.1.2/dist/polyfill.iife.min.js
  14. // @require https://unpkg.com/get-wild@1.4.1/dist/index.umd.min.js
  15. // @require https://unpkg.com/gm-compat@1.1.0/dist/index.iife.min.js
  16. // @require https://unpkg.com/just-safe-set@2.1.0/index.js
  17. // @run-at document-start
  18. // ==/UserScript==
  19.  
  20. /*
  21. * a pattern which matches the content-type header of responses we scan for
  22. * URLs: "application/json" or "application/json; charset=utf-8"
  23. */
  24. const CONTENT_TYPE = /^application\/json\b/
  25.  
  26. /*
  27. * the minimum size (in bytes) of documents we deem to be "not small"
  28. *
  29. * we log (to the console) misses (i.e. no URLs ever found/replaced) in
  30. * documents whose size is greater than or equal to this value
  31. *
  32. * if we keep failing to find URLs in large documents, we may be able to speed
  33. * things up by blacklisting them, at least in theory
  34. *
  35. * (in practice, URL data is optional in most of the matched document types
  36. * (contained in arrays that can be empty), so an absence of URLs doesn't
  37. * necessarily mean URL data will never be included...)
  38. */
  39. const LOG_THRESHOLD = 1024
  40.  
  41. /*
  42. * an immutable array used in various places as a way to indicate "no values".
  43. * static to avoid unnecessary allocations.
  44. */
  45. const NONE = []
  46.  
  47. /*
  48. * used to keep track of which queries (don't) have matching URIs and which URIs
  49. * (don't) have matching queries
  50. */
  51. const STATS = { root: {}, uri: {} }
  52.  
  53. /*
  54. * the domain intercepted links are routed through
  55. *
  56. * not all links are intercepted. exceptions include links to twitter (e.g.
  57. * https://twitter.com) and card URIs (e.g. card://123456)
  58. */
  59. const TRACKING_DOMAIN = 't.co'
  60.  
  61. /*
  62. * a pattern which matches the domain(s) we expect data (JSON) to come from.
  63. * responses which don't come from a matching domain are ignored.
  64. */
  65. const TWITTER_API = /^(?:(?:api|mobile)\.)?twitter\.com$/
  66.  
  67. /*
  68. * default locations to search for URL metadata (arrays of objects) within tweet
  69. * nodes
  70. */
  71. const TWEET_PATHS = [
  72. 'entities.media',
  73. 'entities.urls',
  74. 'extended_entities.media',
  75. 'extended_entities.urls',
  76. ]
  77.  
  78. /*
  79. * default locations to search for URL metadata (arrays of objects) within
  80. * user/profile nodes
  81. */
  82. const USER_PATHS = [
  83. 'entities.description.urls',
  84. 'entities.url.urls',
  85. ]
  86.  
  87. /*
  88. * a router which matches URIs (pathnames) to queries. each query contains a
  89. * root path (required) and some additional options which specify the locations
  90. * under the root path to substitute URLs in.
  91. *
  92. * implemented as an array of pairs with URI-pattern keys (string(s) or
  93. * regexp(s)) and one or more queries as the value. if a query is a string
  94. * (path), it is converted into an object with the path as its `root`
  95. * property.
  96. *
  97. * options:
  98. *
  99. * - root (required): a path (string or array of steps) into the document
  100. * under which to begin searching
  101. *
  102. * - collect (default: Object.values): a function which takes a root node and
  103. * turns it into an array of context nodes to scan for URL data
  104. *
  105. * - scan (default: USER_PATHS): an array of paths to probe for arrays of
  106. * { url, expanded_url } pairs in a context node
  107. *
  108. * - targets (default: NONE): an array of paths to standalone URLs (URLs that
  109. * don't have an accompanying expansion), e.g. for URLs in cards embedded in
  110. * tweets. these URLs are replaced by expanded URLs gathered during the
  111. * scan.
  112. *
  113. * target paths can point directly to a URL node (string) or to an
  114. * array of objects. in the latter case, we find the URL object in the array
  115. * (obj.key === "card_url") and replace its URL node (obj.value.string_value)
  116. *
  117. * if a target path is an object containing a { url: path, expanded_url: path }
  118. * pair, the URL is expanded directly in the same way as scanned paths.
  119. */
  120. const MATCH = [
  121. [
  122. /\/Conversation$/, [
  123. 'data.conversation_timeline.instructions.*.moduleItems.*.item.itemContent.tweet.core.user.legacy',
  124. 'data.conversation_timeline.instructions.*.entries.*.content.items.*.item.itemContent.tweet.core.user.legacy',
  125. {
  126. root: 'data.conversation_timeline.instructions.*.moduleItems.*.item.itemContent.tweet.legacy',
  127. scan: TWEET_PATHS,
  128. targets: ['card.binding_values', 'card.url'],
  129. },
  130. {
  131. root: 'data.conversation_timeline.instructions.*.entries.*.content.items.*.item.itemContent.tweet.legacy',
  132. scan: TWEET_PATHS,
  133. targets: ['card.binding_values', 'card.url'],
  134. },
  135. ]
  136. ],
  137. [
  138. /\/Favoriters$/,
  139. 'data.favoriters_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  140. ],
  141. [
  142. /\/Following$/,
  143. 'data.user.following_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  144. ],
  145. [
  146. /\/Followers$/,
  147. 'data.user.followers_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  148. ],
  149. [
  150. /\/FollowersYouKnow$/,
  151. 'data.user.friends_following_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  152. ],
  153. [
  154. /\/ListMembers$/,
  155. 'data.list.members_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy'
  156. ],
  157. [
  158. /\/ListSubscribers$/,
  159. 'data.list.subscribers_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  160. ],
  161. [
  162. /\/Retweeters/,
  163. 'data.retweeters_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy'
  164. ],
  165. [
  166. // used for hovercard data
  167. /\/UserByScreenName$/, {
  168. root: 'data.user.legacy',
  169. collect: Array.of,
  170. }
  171. ],
  172. [
  173. // e.g. /i/api/graphql/abcd1234/UserMedia
  174. /\/UserMedia$/, [
  175. 'data.user.result.timeline.timeline.instructions.*.entries.*.content.itemContent.tweet.card.legacy.user_refs.*.legacy',
  176. 'data.user.result.timeline.timeline.instructions.*.entries.*.content.itemContent.tweet.core.user.legacy',
  177. 'data.user.result.timeline.timeline.instructions.*.entries.*.content.itemContent.tweet.quoted_status.card.legacy.user_refs.*.legacy',
  178. {
  179. root: 'data.user.result.timeline.timeline.instructions.*.entries.*.content.itemContent.tweet.legacy',
  180. scan: TWEET_PATHS,
  181. },
  182. {
  183. root: 'data.user.result.timeline.timeline.instructions.*.entries.*.content.itemContent.tweet.quoted_status.legacy',
  184. scan: TWEET_PATHS,
  185. },
  186. ]
  187. ],
  188. [
  189. // DMs
  190. // e.g. '/1.1/dm/inbox_initial_state.json' and '/1.1/dm/user_updates.json'
  191. /\/(?:inbox_initial_state|user_updates)\.json$/, {
  192. root: 'inbox_initial_state.entries.*.message.message_data',
  193. scan: TWEET_PATHS,
  194. targets: [
  195. 'attachment.card.binding_values.card_url.string_value',
  196. 'attachment.card.url',
  197. ],
  198. }
  199. ],
  200. [
  201. // e.g. '/1.1/friends/following/list.json',
  202. /\/list\.json$/,
  203. 'users.*'
  204. ],
  205. [
  206. // e.g. '/1.1/users/lookup.json',
  207. /\/lookup\.json$/, {
  208. root: NONE, // the document itself (an array of users) is the root
  209. }
  210. ],
  211. [
  212. // "Who to follow"
  213. // e.g. '/1.1/users/recommendations.json'
  214. /\/recommendations\.json$/, {
  215. root: '*.user',
  216. }
  217. ],
  218. ]
  219.  
  220. /*
  221. * a single { pattern => queries } pair for the router which matches all URIs
  222. */
  223. const WILDCARD = [
  224. /./,
  225. [
  226. {
  227. root: 'globalObjects.tweets',
  228. scan: TWEET_PATHS,
  229. targets: [
  230. {
  231. url: 'card.binding_values.website_shortened_url.string_value',
  232. expanded_url: 'card.binding_values.website_url.string_value',
  233. },
  234. 'card.binding_values.card_url.string_value',
  235. 'card.url',
  236. ],
  237. },
  238. 'globalObjects.tweets.*.card.users.*',
  239. 'globalObjects.users',
  240. ]
  241. ]
  242.  
  243. /*
  244. * a custom version of get-wild's `get` function which uses a simpler/faster
  245. * path parser since we don't use the extended syntax
  246. */
  247. const get = exports.getter({ split: '.' })
  248.  
  249. /*
  250. * a helper function which returns true if the supplied value is a plain object,
  251. * false otherwise
  252. */
  253. const isPlainObject = (function () {
  254. const toString = {}.toString
  255. // only used with JSON data, so we don't need this to be foolproof
  256. return value => toString.call(value) === '[object Object]'
  257. })()
  258.  
  259. /*
  260. * a helper function which iterates over the supplied iterable, filtering out
  261. * missing (undefined) values.
  262. *
  263. * this is done in one pass (rather than map + filter) as there may potentially
  264. * be dozens or even hundreds of values, e.g. contexts (tweet/user objects)
  265. * under a root node
  266. */
  267. function eachDefined (iterable, fn) {
  268. for (const value of iterable) {
  269. if (value) fn(value)
  270. }
  271. }
  272.  
  273. /**
  274. * a helper function which returns true if the supplied URL is tracked by
  275. * Twitter, false otherwise
  276. */
  277. function isTracked (url) {
  278. return (new URL(url)).hostname === TRACKING_DOMAIN
  279. }
  280.  
  281. /*
  282. * JSON.stringify helper used to serialize stats data
  283. */
  284. function replacer (_key, value) {
  285. return (value instanceof Set) ? Array.from(value) : value
  286. }
  287.  
  288. /*
  289. * a generator which returns { pattern => queries } pairs where patterns
  290. * are strings/regexps which match a URI and queries are objects which
  291. * define substitutions to perform in the matched document.
  292. *
  293. * this forms the basis of a simple "router" which tries all URI patterns
  294. * until one matches (or none match) and then additionally performs a
  295. * wildcard match which works on all URIs.
  296. *
  297. * the URI patterns are disjoint, so there's no need to try them all if one
  298. * matches. in addition to these, some substitutions are non URI-specific,
  299. * i.e. they work on documents that aren't matched by URI (e.g.
  300. * profile.json) and documents that are (e.g. list.json). currently the
  301. * latter all transform locations under obj.globalObjects, so we check for
  302. * the existence of that property before yielding these catch-all queries
  303. */
  304. function* router (data, state) {
  305. for (const [key, value] of MATCH) {
  306. yield [key, value]
  307.  
  308. if (state.matched) {
  309. break
  310. }
  311. }
  312.  
  313. if ('globalObjects' in data) {
  314. yield WILDCARD
  315. }
  316. }
  317.  
  318. /*
  319. * a helper class which implements document-specific (MATCH) and generic
  320. * (WILDCARD) URL substitutions in nodes (subtrees) within a JSON-formatted
  321. * document returned by the Twitter API.
  322. *
  323. * a transformer is instantiated for each query and its methods are passed a
  324. * context (node within the document tree) and the value of an option from the
  325. * query, e.g. the `scan` option is handled by the `_scan` method and the
  326. * `targets` option is processed by the `_assign` method
  327. */
  328. class Transformer {
  329. constructor ({ onReplace, root, uri }) {
  330. this._cache = new Map()
  331. this._onReplace = onReplace
  332. this._root = root
  333. this._uri = uri
  334. }
  335.  
  336. /*
  337. * expand URLs in context nodes in the locations specified by the query's
  338. * `scan` and `targets` options
  339. */
  340. // @ts-ignore https://github.com/microsoft/TypeScript/issues/14279
  341. transform (contexts, scan, targets) {
  342. // scan the context nodes for { url, expanded_url } pairs, replace
  343. // each t.co URL with its expansion, and add the mappings to the
  344. // cache
  345. eachDefined(contexts, context => this._scan(context, scan))
  346.  
  347. // do a separate pass for targets because some nested card URLs are
  348. // expanded in other (earlier) tweets under the same root
  349. if (targets.length) {
  350. eachDefined(contexts, context => this._assign(context, targets))
  351. }
  352. }
  353.  
  354. /*
  355. * scan the context node for { url, expanded_url } pairs, replace each t.co
  356. * URL with its expansion, and add the mappings to the cache
  357. */
  358. _scan (context, paths) {
  359. const { _cache: cache, _onReplace: onReplace } = this
  360.  
  361. for (const path of paths) {
  362. const items = get(context, path, NONE)
  363.  
  364. for (const item of items) {
  365. if (item.url && item.expanded_url) {
  366. if (isTracked(item.url)) {
  367. cache.set(item.url, item.expanded_url)
  368. item.url = item.expanded_url
  369. onReplace()
  370. }
  371. } else {
  372. console.warn("can't find url/expanded_url pair for:", {
  373. uri: this._uri,
  374. root: this._root,
  375. path,
  376. item,
  377. })
  378. }
  379. }
  380. }
  381. }
  382.  
  383. /*
  384. * replace URLs in the context which weren't substituted during the scan.
  385. *
  386. * these are either standalone URLs whose expansion we retrieve from the
  387. * cache, or URLs whose expansion exists in the context in a location not
  388. * covered by the scan
  389. */
  390. _assign (context, targets) {
  391. for (const target of targets) {
  392. if (isPlainObject(target)) {
  393. this._assignFromPath(context, target)
  394. } else {
  395. this._assignFromCache(context, target)
  396. }
  397. }
  398. }
  399.  
  400. /*
  401. * replace a short URL in the context with an expanded URL defined in the
  402. * context.
  403. *
  404. * this is similar to the replacements performed during the scan, but rather
  405. * than using a fixed set of locations/property names, the paths to the
  406. * short/expanded URLs are supplied as a parameter
  407. */
  408. _assignFromPath (context, target) {
  409. const { url: urlPath, expanded_url: expandedUrlPath } = target
  410.  
  411. let url, expandedUrl
  412.  
  413. if (
  414. (url = get(context, urlPath))
  415. && isTracked(url)
  416. && (expandedUrl = get(context, expandedUrlPath))
  417. ) {
  418. this._cache.set(url, expandedUrl)
  419. exports.set(context, urlPath, expandedUrl)
  420. this._onReplace()
  421. }
  422. }
  423.  
  424. /*
  425. * pinpoint an isolated URL in the context which doesn't have a
  426. * corresponding expansion, and replace it using the mappings we collected
  427. * during the scan
  428. */
  429. _assignFromCache (context, path) {
  430. let url, $context = context, $path = path
  431.  
  432. const node = get(context, path)
  433.  
  434. // if the target points to an array rather than a string, locate the URL
  435. // object within the array automatically
  436. if (Array.isArray(node)) {
  437. if ($context = node.find(it => it.key === 'card_url')) {
  438. $path = 'value.string_value'
  439. url = get($context, $path)
  440. }
  441. } else {
  442. url = node
  443. }
  444.  
  445. if (typeof url === 'string' && isTracked(url)) {
  446. const expandedUrl = this._cache.get(url)
  447.  
  448. if (expandedUrl) {
  449. exports.set($context, $path, expandedUrl)
  450. this._onReplace()
  451. } else {
  452. console.warn(`can't find expanded URL for ${url} in ${this._uri}`)
  453. }
  454. }
  455. }
  456. }
  457.  
  458. /*
  459. * replace t.co URLs with the original URL in all locations in the document
  460. * which contain URLs
  461. */
  462. function transform (data, uri) {
  463. let count = 0
  464.  
  465. if (!STATS.uri[uri]) {
  466. STATS.uri[uri] = new Set()
  467. }
  468.  
  469. const state = { matched: false }
  470. const it = router(data, state)
  471.  
  472. for (const [key, value] of it) {
  473. const uris = NONE.concat(key) // coerce to an array
  474. const queries = NONE.concat(value)
  475. const match = uris.some(want => {
  476. return (typeof want === 'string') ? (uri === want) : want.test(uri)
  477. })
  478.  
  479. if (match) {
  480. // stop matching URIs after this and switch to the wildcard queries
  481. state.matched = true
  482. } else {
  483. // try the next URI pattern, or switch to the wildcard queries if
  484. // there are no more patterns to match against
  485. continue
  486. }
  487.  
  488. for (const $query of queries) {
  489. const query = isPlainObject($query) ? $query : { root: $query }
  490. const { root: rootPath } = query
  491.  
  492. if (!STATS.root[rootPath]) {
  493. STATS.root[rootPath] = new Set()
  494. }
  495.  
  496. const root = get(data, rootPath)
  497.  
  498. // may be an array (e.g. lookup.json)
  499. if (!root || typeof root !== 'object') {
  500. continue
  501. }
  502.  
  503. const {
  504. collect = Object.values,
  505. scan = USER_PATHS,
  506. targets = NONE,
  507. } = query
  508.  
  509. const updateStats = () => {
  510. ++count
  511. STATS.uri[uri].add(rootPath)
  512. STATS.root[rootPath].add(uri)
  513. }
  514.  
  515. const contexts = collect(root)
  516.  
  517. const transformer = new Transformer({
  518. onReplace: updateStats,
  519. root: rootPath,
  520. uri
  521. })
  522.  
  523. // @ts-ignore https://github.com/microsoft/TypeScript/issues/14279
  524. transformer.transform(contexts, scan, targets)
  525. }
  526. }
  527.  
  528. return count
  529. }
  530.  
  531. /*
  532. * replacement for Twitter's default response handler. we transform the response
  533. * if it's a) JSON and b) contains URL data; otherwise, we leave it unchanged
  534. */
  535. function onResponse (xhr, uri) {
  536. const contentType = xhr.getResponseHeader('Content-Type')
  537.  
  538. if (!CONTENT_TYPE.test(contentType)) {
  539. return
  540. }
  541.  
  542. const url = new URL(uri)
  543.  
  544. // exclude e.g. the config-<date>.json file from pbs.twimg.com, which is the
  545. // second biggest document (~500K) after home_latest.json (~700K)
  546. if (!TWITTER_API.test(url.hostname)) {
  547. return
  548. }
  549.  
  550. const json = xhr.responseText
  551. const size = json.length
  552.  
  553. // fold URIs which differ only in the user ID, e.g.:
  554. // /2/timeline/profile/1234.json -> /2/timeline/profile.json
  555. const path = url.pathname.replace(/\/\d+\.json$/, '.json')
  556.  
  557. let data
  558.  
  559. try {
  560. data = JSON.parse(json)
  561. } catch (e) {
  562. console.error(`Can't parse JSON for ${uri}:`, e)
  563. return
  564. }
  565.  
  566. const oldStats = JSON.stringify(STATS, replacer)
  567. const count = transform(data, path)
  568.  
  569. if (!count) {
  570. if (STATS.uri[path].size === 0 && size >= LOG_THRESHOLD) {
  571. console.debug(`no replacements in ${path} (${size} B)`)
  572. }
  573.  
  574. return
  575. }
  576.  
  577. const descriptor = { value: JSON.stringify(data) }
  578. const clone = GMCompat.export(descriptor)
  579.  
  580. GMCompat.unsafeWindow.Object.defineProperty(xhr, 'responseText', clone)
  581.  
  582. const newStats = JSON.stringify(STATS, replacer)
  583.  
  584. if (newStats !== oldStats) {
  585. const replacements = 'replacement' + (count === 1 ? '' : 's')
  586. console.debug(`${count} ${replacements} in ${path} (${size} B)`)
  587. console.log(JSON.parse(newStats))
  588. }
  589. }
  590.  
  591. /*
  592. * replace the built-in XHR#send method with our custom version which swaps in
  593. * our custom response handler. once done, we delegate to the original handler
  594. * (this.onreadystatechange)
  595. */
  596. function hookXHRSend (oldSend) {
  597. return /** @this {XMLHttpRequest} */ function send (body = null) {
  598. const oldOnReadyStateChange = this.onreadystatechange
  599.  
  600. this.onreadystatechange = function (event) {
  601. if (this.readyState === this.DONE && this.responseURL && this.status === 200) {
  602. onResponse(this, this.responseURL)
  603. }
  604.  
  605. if (oldOnReadyStateChange) {
  606. oldOnReadyStateChange.call(this, event)
  607. }
  608. }
  609.  
  610. oldSend.call(this, body)
  611. }
  612. }
  613.  
  614. /*
  615. * replace the default XHR#send with our custom version, which scans responses
  616. * for tweets and expands their URLs
  617. */
  618. const xhrProto = GMCompat.unsafeWindow.XMLHttpRequest.prototype
  619.  
  620. xhrProto.send = GMCompat.export(hookXHRSend(xhrProto.send))