Twitter Direct

Remove t.co tracking links from Twitter

当前为 2020-11-13 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Twitter Direct
  3. // @description Remove t.co tracking links from Twitter
  4. // @author chocolateboy
  5. // @copyright chocolateboy
  6. // @version 1.3.0
  7. // @namespace https://github.com/chocolateboy/userscripts
  8. // @license GPL: https://www.gnu.org/copyleft/gpl.html
  9. // @include https://twitter.com/
  10. // @include https://twitter.com/*
  11. // @include https://mobile.twitter.com/
  12. // @include https://mobile.twitter.com/*
  13. // @require https://unpkg.com/@chocolateboy/uncommonjs@2.0.1/index.min.js
  14. // @require https://unpkg.com/get-wild@1.2.0/dist/index.umd.min.js
  15. // @require https://unpkg.com/just-safe-set@2.1.0/index.js
  16. // @require https://cdn.jsdelivr.net/gh/chocolateboy/gm-compat@a26896b85770aa853b2cdaf2ff79029d8807d0c0/index.min.js
  17. // @run-at document-start
  18. // @inject-into auto
  19. // ==/UserScript==
  20.  
  21. /*
  22. * a pattern which matches the content-type header of responses we scan for
  23. * URLs: "application/json" or "application/json; charset=utf-8"
  24. */
  25. const CONTENT_TYPE = /^application\/json\b/
  26.  
  27. /*
  28. * the minimum size (in bytes) of documents we deem to be "not small"
  29. *
  30. * we log (to the console) misses (i.e. no URLs ever found/replaced) in
  31. * documents whose size is greater than or equal to this value
  32. *
  33. * if we keep failing to find URLs in large documents, we may be able to speed
  34. * things up by blacklisting them, at least in theory
  35. *
  36. * (in practice, URL data is optional in most of the matched document types
  37. * (contained in arrays that can be empty), so an absence of URLs doesn't
  38. * necessarily mean URL data will never be included...)
  39. */
  40. const LOG_THRESHOLD = 1024
  41.  
  42. /*
  43. * an immutable array used in various places as a way to indicate "no values".
  44. * static to avoid unnecessary allocations.
  45. */
  46. const NONE = []
  47.  
  48. /*
  49. * used to keep track of which roots (don't) have matching URIs and which URIs
  50. * (don't) have matching roots
  51. */
  52. const STATS = { root: {}, uri: {} }
  53.  
  54. /*
  55. * the domain intercepted links are routed through
  56. *
  57. * not all links are intercepted. exceptions include links to twitter (e.g.
  58. * https://twitter.com) and card URIs (e.g. card://123456)
  59. */
  60. const TRACKING_DOMAIN = 't.co'
  61.  
  62. /*
  63. * a pattern which matches the domain(s) we expect data (JSON) to come from.
  64. * responses which don't come from a matching domain are ignored.
  65. */
  66. const TWITTER_API = /^(?:api\.)?twitter\.com$/
  67.  
  68. /*
  69. * default locations to search for URL metadata (arrays of objects) within tweet
  70. * nodes
  71. */
  72. const TWEET_PATHS = [
  73. 'entities.media',
  74. 'entities.urls',
  75. 'extended_entities.media',
  76. 'extended_entities.urls',
  77. ]
  78.  
  79. /*
  80. * default locations to search for URL metadata (arrays of objects) within
  81. * user/profile nodes
  82. */
  83. const USER_PATHS = [
  84. 'entities.description.urls',
  85. 'entities.url.urls',
  86. ]
  87.  
  88. /*
  89. * a router which matches URIs (pathnames) to queries. each query contains a
  90. * root path (required) and some additional options which specify the locations
  91. * under the root path to substitute URLs in.
  92. *
  93. * implemented as an array of pairs with URI-pattern keys (string(s) or
  94. * regexp(s)) and one or more queries as the value. if a query is a path (string
  95. * or array) it is converted into an object with the path as its `root`
  96. * property.
  97. *
  98. * options:
  99. *
  100. * - root (required): a path (string or array of steps) into the document
  101. * under which to begin searching
  102. *
  103. * - collect (default: Object.values): a function which takes a root node and
  104. * turns it into an array of context nodes to scan for URL data
  105. *
  106. * - scan (default: USER_PATHS): an array of paths to probe for arrays of
  107. * { url, expanded_url } pairs in a context node
  108. *
  109. * - targets (default: NONE): an array of paths to standalone URLs (URLs that
  110. * don't have an accompanying expansion), e.g. for URLs in cards embedded in
  111. * tweets. these URLs are replaced by expanded URLs gathered during the
  112. * scan.
  113. *
  114. * target paths can point directly to a URL node (string) or to an
  115. * array of objects. in the latter case, we find the URL object in the array
  116. * (obj.key === "card_url") and replace its URL node (obj.value.string_value)
  117. *
  118. * if a target path is an object containing a { url: path, expanded_url: path }
  119. * pair, the URL is expanded directly in the same way as scanned paths.
  120. */
  121. const MATCH = [
  122. [
  123. // e.g. '/1.1/users/lookup.json',
  124. /\/lookup\.json$/, {
  125. root: NONE, // returns self
  126. }
  127. ],
  128. [
  129. /\/Conversation$/, [
  130. 'data.conversation_timeline.instructions.*.moduleItems.*.item.itemContent.tweet.core.user.legacy',
  131. 'data.conversation_timeline.instructions.*.entries.*.content.items.*.item.itemContent.tweet.core.user.legacy',
  132. {
  133. root: 'data.conversation_timeline.instructions.*.moduleItems.*.item.itemContent.tweet.legacy',
  134. scan: TWEET_PATHS,
  135. targets: ['card.binding_values', 'card.url'],
  136. },
  137. {
  138. root: 'data.conversation_timeline.instructions.*.entries.*.content.items.*.item.itemContent.tweet.legacy',
  139. scan: TWEET_PATHS,
  140. targets: ['card.binding_values', 'card.url'],
  141. },
  142. ]
  143. ],
  144. [
  145. /\/Following$/,
  146. 'data.user.following_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  147. ],
  148. [
  149. /\/Followers$/,
  150. 'data.user.followers_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  151. ],
  152. [
  153. /\/FollowersYouKnow$/,
  154. 'data.user.friends_following_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  155. ],
  156. [
  157. /\/ListMembers$/,
  158. 'data.list.members_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy'
  159. ],
  160. [
  161. /\/ListSubscribers$/,
  162. 'data.list.subscribers_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  163. ],
  164. [
  165. // used for hovercard data
  166. /\/UserByScreenName$/, {
  167. root: 'data.user.legacy',
  168. collect: Array.of,
  169. }
  170. ],
  171. [
  172. // DMs
  173. // e.g. '/1.1/dm/inbox_initial_state.json' and '/1.1/dm/user_updates.json'
  174. /\/(?:inbox_initial_state|user_updates)\.json$/, {
  175. root: 'inbox_initial_state.entries.*.message.message_data',
  176. scan: TWEET_PATHS,
  177. targets: [
  178. 'attachment.card.binding_values.card_url.string_value',
  179. 'attachment.card.url',
  180. ],
  181. }
  182. ],
  183. [
  184. // e.g. '/1.1/friends/following/list.json',
  185. /\/list\.json$/,
  186. 'users.*'
  187. ],
  188. ]
  189.  
  190. /*
  191. * a single { pattern => queries } pair for the router which matches all URIs
  192. */
  193. const WILDCARD = [
  194. /./,
  195. [
  196. {
  197. root: 'globalObjects.tweets',
  198. scan: TWEET_PATHS,
  199. targets: [{
  200. url: 'card.binding_values.website_shortened_url.string_value',
  201. expanded_url: 'card.binding_values.website_url.string_value',
  202. },
  203. 'card.binding_values.card_url.string_value',
  204. 'card.url',
  205. ],
  206. },
  207. 'globalObjects.tweets.*.card.users.*',
  208. 'globalObjects.users',
  209. ]
  210. ]
  211.  
  212. /*
  213. * a custom version of get-wild's `get` function which uses a simpler/faster
  214. * path parser since we don't use the extended syntax
  215. */
  216. const get = exports.getter({ split: '.' })
  217.  
  218. /*
  219. * a helper function which returns true if the supplied value is a plain object,
  220. * false otherwise
  221. */
  222. const isPlainObject = (function () {
  223. const toString = {}.toString
  224. return value => toString.call(value) === '[object Object]'
  225. })()
  226.  
  227. /*
  228. * a helper function which iterates over the supplied iterable, filtering out
  229. * missing (undefined) values.
  230. *
  231. * this is done in one pass (rather than map + filter) as there may potentially
  232. * be dozens or even hundreds of values, e.g. contexts (tweet/user objects)
  233. * under a root node
  234. */
  235. function eachDefined (iterable, fn) {
  236. for (const value of iterable) {
  237. if (value) fn(value)
  238. }
  239. }
  240.  
  241. /**
  242. * a helper function which returns true if the supplied URL is tracked by
  243. * Twitter, false otherwise
  244. */
  245. function isTracked (url) {
  246. return (new URL(url)).hostname === TRACKING_DOMAIN
  247. }
  248.  
  249. /*
  250. * JSON.stringify helper used to serialize stats data
  251. */
  252. function replacer (_key, value) {
  253. return (value instanceof Set) ? Array.from(value) : value
  254. }
  255.  
  256. /*
  257. * an iterator which returns { pattern => queries } pairs where patterns
  258. * are strings/regexps which match a URI and queries are objects which
  259. * define substitutions to perform in the matched document.
  260. *
  261. * this forms the basis of a simple "router" which tries all URI patterns
  262. * until one matches (or none match) and then additionally performs a
  263. * wildcard match which works on all URIs.
  264. *
  265. * the URI patterns are disjoint, so there's no need to try them all if one
  266. * matches. in addition to these, some substitutions are non URI-specific,
  267. * i.e. they work on documents that aren't matched by URI (e.g.
  268. * profile.json) and documents that are (e.g. list.json). currently the
  269. * latter all transform locations under obj.globalObjects, so we check for
  270. * the existence of that property before yielding these catch-all queries
  271. */
  272. function* router (state, data) {
  273. for (const [key, value] of MATCH) {
  274. yield [key, value]
  275.  
  276. if (state.matched) {
  277. break
  278. }
  279. }
  280.  
  281. if ('globalObjects' in data) {
  282. yield WILDCARD
  283. }
  284. }
  285.  
  286. /*
  287. * a helper class which implements document-specific (MATCH) and generic
  288. * (WILDCARD) URL substitutions in nodes (subtrees) within a JSON-formatted
  289. * document returned by the Twitter API.
  290. *
  291. * a transformer is instantiated for each query and its methods are passed a
  292. * context (node within the document tree) and the value of an option from the
  293. * query, e.g. the `scan` option is handled by the `scan` method and the
  294. * `targets` option is processed by the `assign` method
  295. */
  296. class Transformer {
  297. constructor ({ onReplace, root, uri }) {
  298. this._cache = new Map()
  299. this._onReplace = onReplace
  300. this._root = root
  301. this._uri = uri
  302. }
  303.  
  304. /*
  305. * expand URLs in context nodes in the locations specified by the query's
  306. * `scan` and `targets` options
  307. */
  308. // @ts-ignore https://github.com/microsoft/TypeScript/issues/14279
  309. transform (contexts, scan, targets) {
  310. // scan the context nodes for { url, expanded_url } pairs, replace
  311. // each t.co URL with its expansion, and add the mappings to the
  312. // cache
  313. eachDefined(contexts, context => this._scan(context, scan))
  314.  
  315. // do a separate pass for targets because some nested card URLs are
  316. // expanded in other (earlier) tweets under the same root
  317. if (targets.length) {
  318. eachDefined(contexts, context => this._assign(context, targets))
  319. }
  320. }
  321.  
  322. /*
  323. * scan the context node for { url, expanded_url } pairs, replace each t.co
  324. * URL with its expansion, and add the mappings to the cache
  325. */
  326. _scan (context, paths) {
  327. const { _cache: cache, _onReplace: onReplace } = this
  328.  
  329. for (const path of paths) {
  330. const items = get(context, path, NONE)
  331.  
  332. for (const item of items) {
  333. if (item.url && item.expanded_url) {
  334. if (isTracked(item.url)) {
  335. cache.set(item.url, item.expanded_url)
  336. item.url = item.expanded_url
  337. onReplace()
  338. }
  339. } else {
  340. console.warn("can't find url/expanded_url pair for:", {
  341. uri: this._uri,
  342. root: this._root,
  343. path,
  344. item,
  345. })
  346. }
  347. }
  348. }
  349. }
  350.  
  351. /*
  352. * replace URLs in the context which weren't substituted during the scan.
  353. *
  354. * these are either standalone URLs whose expansion we retrieve from the
  355. * cache, or URLs whose expansion exists in the context in a location not
  356. * covered by the scan
  357. */
  358. _assign (context, targets) {
  359. for (const target of targets) {
  360. if (isPlainObject(target)) {
  361. this._assignFromPath(context, target)
  362. } else {
  363. this._assignFromCache(context, target)
  364. }
  365. }
  366. }
  367.  
  368. /*
  369. * replace a short URL in the context with an expanded URL defined in the
  370. * context.
  371. *
  372. * this is similar to the replacements performed during the scan, but rather
  373. * than using a fixed set of locations/property names, the paths to the
  374. * short/expanded URLs are supplied as a parameter
  375. */
  376. _assignFromPath (context, target) {
  377. const { url: urlPath, expanded_url: expandedUrlPath } = target
  378.  
  379. let url, expandedUrl
  380.  
  381. if (
  382. (url = get(context, urlPath))
  383. && isTracked(url)
  384. && (expandedUrl = get(context, expandedUrlPath))
  385. ) {
  386. this._cache.set(url, expandedUrl)
  387. exports.set(context, urlPath, expandedUrl)
  388. this._onReplace()
  389. }
  390. }
  391.  
  392. /*
  393. * pinpoint an isolated URL in the context which doesn't have a
  394. * corresponding expansion, and replace it using the mappings we collected
  395. * during the scan
  396. */
  397. _assignFromCache (context, path) {
  398. let url, $context = context, $path = path
  399.  
  400. const node = get(context, path)
  401.  
  402. // if the target points to an array rather than a string, locate the URL
  403. // object within the array automatically
  404. if (Array.isArray(node)) {
  405. if ($context = node.find(it => it.key === 'card_url')) {
  406. $path = 'value.string_value'
  407. url = get($context, $path)
  408. }
  409. } else {
  410. url = node
  411. }
  412.  
  413. if (typeof url === 'string' && isTracked(url)) {
  414. const expandedUrl = this._cache.get(url)
  415.  
  416. if (expandedUrl) {
  417. exports.set($context, $path, expandedUrl)
  418. this._onReplace()
  419. } else {
  420. console.warn(`can't find expanded URL for ${url} in ${this._uri}`)
  421. }
  422. }
  423. }
  424. }
  425.  
  426. /*
  427. * replace t.co URLs with the original URL in all locations in the document
  428. * which contain URLs
  429. */
  430. function transform (data, uri) {
  431. let count = 0
  432.  
  433. if (!STATS.uri[uri]) {
  434. STATS.uri[uri] = new Set()
  435. }
  436.  
  437. const state = { matched: false }
  438. const it = router(state, data)
  439.  
  440. for (const [key, value] of it) {
  441. const uris = NONE.concat(key)
  442. const queries = NONE.concat(value)
  443. const match = uris.some(want => {
  444. return (typeof want === 'string') ? (uri === want) : want.test(uri)
  445. })
  446.  
  447. if (match) {
  448. // stop matching URIs and switch to the wildcard queries
  449. state.matched = true
  450. } else {
  451. // try the next URI pattern, or switch to the wildcard queries if
  452. // there are no more patterns to match against
  453. continue
  454. }
  455.  
  456. for (const $query of queries) {
  457. const query = isPlainObject($query) ? $query : { root: $query }
  458. const { root: rootPath } = query
  459.  
  460. if (!STATS.root[rootPath]) {
  461. STATS.root[rootPath] = new Set()
  462. }
  463.  
  464. const root = get(data, rootPath)
  465.  
  466. // may be an array (e.g. lookup.json)
  467. if (!root || typeof root !== 'object') {
  468. continue
  469. }
  470.  
  471. const {
  472. collect = Object.values,
  473. scan = USER_PATHS,
  474. targets = NONE,
  475. } = query
  476.  
  477. const updateStats = () => {
  478. ++count
  479. STATS.uri[uri].add(rootPath)
  480. STATS.root[rootPath].add(uri)
  481. }
  482.  
  483. const contexts = collect(root)
  484.  
  485. const transformer = new Transformer({
  486. onReplace: updateStats,
  487. root: rootPath,
  488. uri
  489. })
  490.  
  491. // @ts-ignore https://github.com/microsoft/TypeScript/issues/14279
  492. transformer.transform(contexts, scan, targets)
  493. }
  494. }
  495.  
  496. return count
  497. }
  498.  
  499. /*
  500. * replacement for Twitter's default response handler. we transform the response
  501. * if it's a) JSON and b) contains URL data; otherwise, we leave it unchanged
  502. */
  503. function onResponse (xhr, uri) {
  504. const contentType = xhr.getResponseHeader('Content-Type')
  505.  
  506. if (!CONTENT_TYPE.test(contentType)) {
  507. return
  508. }
  509.  
  510. const url = new URL(uri)
  511.  
  512. // exclude e.g. the config-<date>.json file from pbs.twimg.com, which is the
  513. // second biggest document (~500K) after home_latest.json (~700K)
  514. if (!TWITTER_API.test(url.hostname)) {
  515. return
  516. }
  517.  
  518. const json = xhr.responseText
  519. const size = json.length
  520.  
  521. // fold URIs which differ only in the user ID, e.g.:
  522. // /2/timeline/profile/1234.json -> /2/timeline/profile.json
  523. const path = url.pathname.replace(/\/\d+\.json$/, '.json')
  524.  
  525. let data
  526.  
  527. try {
  528. data = JSON.parse(json)
  529. } catch (e) {
  530. console.error(`Can't parse JSON for ${uri}:`, e)
  531. return
  532. }
  533.  
  534. const oldStats = JSON.stringify(STATS, replacer)
  535. const count = transform(data, path)
  536.  
  537. if (!count) {
  538. if (STATS.uri[path].size === 0 && size >= LOG_THRESHOLD) {
  539. console.debug(`no replacements in ${path} (${size} B)`)
  540. }
  541.  
  542. return
  543. }
  544.  
  545. const descriptor = { value: JSON.stringify(data) }
  546. const clone = GMCompat.export(descriptor)
  547.  
  548. GMCompat.unsafeWindow.Object.defineProperty(xhr, 'responseText', clone)
  549.  
  550. const newStats = JSON.stringify(STATS, replacer)
  551.  
  552. if (newStats !== oldStats) {
  553. const replacements = 'replacement' + (count === 1 ? '' : 's')
  554. console.debug(`${count} ${replacements} in ${path} (${size} B)`)
  555. console.log(JSON.parse(newStats))
  556. }
  557. }
  558.  
  559. /*
  560. * replace the built-in XHR#send method with our custom version which swaps in
  561. * our custom response handler. once done, we delegate to the original handler
  562. * (this.onreadystatechange)
  563. */
  564. function hookXHRSend (oldSend) {
  565. return /** @this {XMLHttpRequest} */ function send (body = null) {
  566. // video requests (HLS) use a readystate listener with a custom object
  567. // bound as its `this` value. the responses aren't tweet/user data so we
  568. // don't need to touch them
  569.  
  570. const oldOnReadyStateChange = this.onreadystatechange
  571. const isBound = oldOnReadyStateChange?.toString().includes('[native code]')
  572.  
  573. if (!isBound) {
  574. this.onreadystatechange = function () {
  575. if (this.readyState === this.DONE && this.responseURL && this.status === 200) {
  576. onResponse(this, this.responseURL)
  577. }
  578.  
  579. if (oldOnReadyStateChange) {
  580. // @ts-ignore
  581. oldOnReadyStateChange.call(this)
  582. }
  583. }
  584. }
  585.  
  586. oldSend.call(this, body)
  587. }
  588. }
  589.  
  590. /*
  591. * replace the default XHR#send with our custom version, which scans responses
  592. * for tweets and expands their URLs
  593. */
  594. const xhrProto = GMCompat.unsafeWindow.XMLHttpRequest.prototype
  595.  
  596. xhrProto.send = GMCompat.export(hookXHRSend(xhrProto.send))