Twitter Direct

Remove t.co tracking links from Twitter

目前为 2021-05-19 提交的版本。查看 最新版本

// ==UserScript==
// @name          Twitter Direct
// @description   Remove t.co tracking links from Twitter
// @author        chocolateboy
// @copyright     chocolateboy
// @version       2.1.1
// @namespace     https://github.com/chocolateboy/userscripts
// @license       GPL
// @include       https://twitter.com/
// @include       https://twitter.com/*
// @include       https://mobile.twitter.com/
// @include       https://mobile.twitter.com/*
// @require       https://unpkg.com/[email protected]/dist/index.iife.min.js
// @run-at        document-start
// ==/UserScript==

/*
 * a pattern which matches the content-type header of responses we scan for
 * URLs: "application/json" or "application/json; charset=utf-8"
 */
const CONTENT_TYPE = /^application\/json\b/

/*
 * document keys under which t.co URL nodes can be found when the document is a
 * plain object. not used when the document is an array.
 *
 * some densely-populated top-level paths don't contain t.co URLs, e.g.
 * $.timeline.
 */
const DOCUMENT_ROOTS = [
    'data',
    'globalObjects',
    'inbox_initial_state',
    'users',
]

/*
 * keys of "legacy" objects which URL data is known to be found in/under,
 * e.g. we're interested in legacy.user_refs.*, legacy.retweeted_status.* and
 * legacy.url, but not in legacy.created_at or legacy.reply_count etc.
 *
 * objects under the "legacy" key typically contain dozens of keys, but we only
 * need to probe/traverse a handful to find t.co URLs.
 *
 * typically this reduces the number of keys to iterate in a legacy object from
 * 30 on average (max 39) to 2 or 3
 */
const LEGACY_KEYS = [
    'binding_values',
    'entities',
    'extended_entities',
    'quoted_status_permalink',
    'retweeted_status',
    'user_refs',
]

/*
 * the minimum size (in bytes) of documents we deem to be "not small"
 *
 * we log (to the console) misses (i.e. no URLs ever found/replaced) in
 * documents whose size is greater than or equal to this value
 */
const LOG_THRESHOLD = 1024

/*
 * nodes under these keys never contain t.co URLs so we can speed up traversal
 * by pruning (not descending) them
 */
const PRUNE_KEYS = new Set([
    'advertiser_account_service_levels',
    'card_platform',
    'clientEventInfo',
    'ext',
    'ext_media_color',
    'features',
    'feedbackInfo',
    'hashtags',
    'original_info',
    'player_image_color',
    'profile_banner_extensions',
    'profile_banner_extensions_media_color',
    'profile_image_extensions',
    'profile_image_extensions_media_color',
    'responseObjects',
    'sizes',
    'user_mentions',
    'video_info',
])

/*
 * a map from URI paths (strings) to the replacement count for each path. used
 * to keep a running total of the number of replacements in each document type
 */
const STATS = {}

/*
 * a pattern which matches the domain(s) we expect data (JSON) to come from.
 * responses which don't come from a matching domain are ignored.
 */
const TWITTER_API = /^(?:(?:api|mobile)\.)?twitter\.com$/

/*
 * a list of document URIs (paths) which are known to not contain t.co URLs and
 * which therefore don't need to be processed
 */
const URL_BLACKLIST = new Set([
    '/i/api/2/badge_count/badge_count.json',
    '/i/api/graphql/articleNudgeDomains',
    '/i/api/graphql/TopicToFollowSidebar',
])

/*
 * object keys whose corresponding values may be t.co URLs
 */
const URL_KEYS = new Set(['url', 'string_value'])

/*
 * return a truthy value (the URL itself) if the supplied value is a valid URL
 * (string), falsey otherwise
 */
const checkUrl = (function () {
    // this is faster than using the URL constructor (in v8), which incurs
    // the overhead of using a try/catch block
    const urlPattern = /^https?:\/\/\w/i

    // no need to coerce the value to a string as RegExp#test does that
    // automatically
    //
    // https://tc39.es/ecma262/#sec-regexp.prototype.test
    return value => urlPattern.test(value) && value
})()

/*
 * replace the built-in XHR#send method with a custom version which swaps in our
 * custom response handler. once done, we delegate to the original handler
 * (this.onreadystatechange)
 */
const hookXHRSend = oldSend => {
    return /** @this {XMLHttpRequest} */ function send (body = null) {
        const oldOnReadyStateChange = this.onreadystatechange

        this.onreadystatechange = function (event) {
            if (this.readyState === this.DONE && this.responseURL && this.status === 200) {
                onResponse(this, this.responseURL)
            }

            if (oldOnReadyStateChange) {
                oldOnReadyStateChange.call(this, event)
            }
        }

        oldSend.call(this, body)
    }
}

/*
 * return true if the supplied value is an array or plain object, false otherwise
 */
const isObject = value => value && (typeof value === 'object')

/*
 * return true if the supplied value is a plain object, false otherwise
 *
 * only used with JSON data, so doesn't need to be foolproof
 */
const isPlainObject = (function () {
    const toString = {}.toString
    return value => toString.call(value) === '[object Object]'
})()

/*
 * return true if the supplied value is a t.co URL (string), false otherwise
 */
const isTrackedUrl = (function () {
    // this is faster (in v8) than using the URL constructor (and a try/catch
    // block)
    const urlPattern = /^https?:\/\/t\.co\/\w+$/

    // no need to coerce the value to a string as RegExp#test does that
    // automatically
    return value => urlPattern.test(value)
})()

/*
 * replacement for Twitter's default handler for XHR requests. we transform the
 * response if it's a) JSON and b) contains URL data; otherwise, we leave it
 * unchanged
 */
const onResponse = (xhr, uri) => {
    const contentType = xhr.getResponseHeader('Content-Type')

    if (!CONTENT_TYPE.test(contentType)) {
        return
    }

    const url = new URL(uri)

    // exclude e.g. the config-<date>.json file from pbs.twimg.com, which is the
    // second biggest document (~500K) after home_latest.json (~700K)
    if (!TWITTER_API.test(url.hostname)) {
        return
    }

    const json = xhr.responseText
    const size = json.length

    // fold paths which differ only in the user or query ID, e.g.:
    //
    //   /2/timeline/profile/1234.json    -> /2/timeline/profile.json
    //   /i/api/graphql/abc123/UserTweets -> /i/api/graphql/UserTweets
    //
    const path = url.pathname
        .replace(/\/\d+\.json$/, '.json')
        .replace(/^(.+?\/graphql\/)[^\/]+\/(.+)$/, '$1$2')

    if (URL_BLACKLIST.has(path)) {
        return
    }

    let data

    try {
        data = JSON.parse(json)
    } catch (e) {
        console.error(`Can't parse JSON for ${uri}:`, e)
        return
    }

    if (!isObject(data)) {
        return
    }

    const newPath = !(path in STATS)
    const count = transform(data, path)

    STATS[path] = (STATS[path] || 0) + count

    if (!count) {
        if (!STATS[path] && size > LOG_THRESHOLD) {
            console.debug(`no replacements in ${path} (${size} B)`)
        }

        return
    }

    const descriptor = { value: JSON.stringify(data) }
    const clone = GMCompat.export(descriptor)

    GMCompat.unsafeWindow.Object.defineProperty(xhr, 'responseText', clone)

    const replacements = 'replacement' + (count === 1 ? '' : 's')

    console.debug(`${count} ${replacements} in ${path} (${size} B)`)

    if (newPath) {
        console.log(STATS)
    }
}

/*
 * JSON.stringify +replace+ function used by +transform+ to traverse documents
 * and update their URL nodes in place.
 */
const replacerFor = state => /** @this {any} */ function replacer (key, value) {
    // exclude subtrees which never contain t.co URLs
    if (PRUNE_KEYS.has(key)) {
        return 0 // a terminal value to stop traversal
    }

    // we only care about the "card_url" property in binding_values
    // objects/arrays. exclude the other 24 properties
    if (key === 'binding_values') {
        if (Array.isArray(value)) {
            const found = value.find(it => it?.key === 'card_url')
            return found ? [found] : 0
        } else if (isPlainObject(value)) {
            return { card_url: (value.card_url || 0) }
        }
    }

    // expand t.co URL nodes in place
    //
    // note this comes before the "legacy" check because legacy.url is a common
    // location and it needs to be modified in place rather than transferred to
    // a new object with a subset of the keys. this doesn't apply to the other
    // legacy keys as they all point to objects/arrays
    if (URL_KEYS.has(key) && isTrackedUrl(value)) {
        const { seen, unresolved } = state

        let expandedUrl

        if ((expandedUrl = seen.get(value))) {
            this[key] = expandedUrl
            ++state.count
        } else if ((expandedUrl = checkUrl(this.expanded_url || this.expanded))) {
            seen.set(value, expandedUrl)
            this[key] = expandedUrl
            ++state.count
        } else {
            let targets = unresolved.get(value)

            if (!targets) {
                unresolved.set(value, targets = [])
            }

            targets.push({ target: this, key })
        }
    }

    // reduce the keys under this.legacy (typically around 30) to the handful we
    // care about
    if (key === 'legacy' && isPlainObject(value)) {
        // we could use an array, but it doesn't appear to be faster (in v8)
        const filtered = {}

        for (let i = 0; i < LEGACY_KEYS.length; ++i) {
            const key = LEGACY_KEYS[i]

            if (key in value) {
                filtered[key] = value[key]
            }
        }

        return filtered
    }

    // shrink terminals (don't waste space/memory in the (discarded) JSON)
    return isObject(value) ? value : 0
}

/*
 * replace t.co URLs with the original URL in all locations in the document
 * which may contain them
 *
 * returns the number of substituted URLs
 */
const transform = (data, path) => {
    const seen = new Map()
    const unresolved = new Map()
    const state = { count: 0, seen, unresolved }
    const replacer = replacerFor(state)

    // [1] top-level tweet or user data (e.g. /favorites/create.json)
    if (Array.isArray(data) || ('id_str' in data) /* [1] */) {
        JSON.stringify(data, replacer)
    } else {
        for (const key of DOCUMENT_ROOTS) {
            if (key in data) {
                JSON.stringify(data[key], replacer)
            }
        }
    }

    for (const [url, targets] of unresolved) {
        const expandedUrl = seen.get(url)

        if (expandedUrl) {
            for (const target of targets) {
                target.target[target.key] = expandedUrl
                ++state.count
            }

            unresolved.delete(url)
        }
    }

    if (unresolved.size) {
        console.warn(`unresolved URIs (${path}):`, Object.fromEntries(state.unresolved))
    }

    return state.count
}

/*
 * replace the default XHR#send with our custom version, which scans responses
 * for tweets and expands their URLs
 */
const xhrProto = GMCompat.unsafeWindow.XMLHttpRequest.prototype

xhrProto.send = GMCompat.export(hookXHRSend(xhrProto.send))