AO3: Sample character tag scraper!

Grabs every single char tag used by fics that you page through!

  1. // ==UserScript==
  2. // @name AO3: Sample character tag scraper!
  3. // @description Grabs every single char tag used by fics that you page through!
  4. // @version 1.0.0
  5.  
  6. // @author owlwinter
  7. // @namespace N/A
  8. // @license MIT license
  9.  
  10. // @match *://*.archiveofourown.org/*TwoSet*
  11. // @grant GM_setValue
  12. // @grant GM_getValue
  13. // ==/UserScript==
  14.  
  15. (function() {
  16. 'use strict';
  17.  
  18. //I needed to grab a list of every single character tag used by works in the twoset violin
  19. //fandom, which had just recently been split off as its own fandom from Vlogging.
  20. //So I wrote this!
  21. //Everytime I page through a page of twoset fics (see the @match in the URL), this will
  22. //grab all the char tags on that page of results and save the unique ones
  23. //Then, on the last page of twoset fics, it pops up the resulting tag list!
  24. //So all I had to do was click through the 100ish pages of TwoSet works and then this script
  25. //did all the rest!
  26.  
  27. //A couple of notes:
  28. //1) This is not a script I'd want always-on, after scraping the data be sure to disable the script again!
  29. //2) The data saved across page to page is not "reset" after it's displayed on the last page -
  30. // In order to reset that, click the "Storage" tab on the top of this script in tapermonkey
  31. // (next to the Editor and Settings tab), and manually change it to be {}
  32. //3) If you are wanting to make your own version of this script from scratch, be sure
  33. // to include the @grant lines (lines 9-10), it's easy to overlook but this script
  34. // won't work without those!
  35. //4) Also don't forget to change the @match statement to your needs!!
  36. //5) Feel free to ping me if you have questions, @owlwinter8 :)
  37.  
  38. //Converts the document.querySelectorAll() results into an array
  39. const array = f => Array.prototype.slice.call(f, 0)
  40.  
  41. var key = "twoset_chardict";
  42. let chartags;
  43.  
  44. if (GM_getValue(key) == null) {
  45. //If there isn't any old data saved, we start fresh!
  46. chartags = new Set();
  47. } else {
  48. //If there is old data saved, we fetch it
  49. var raw = GM_getValue (key, null)
  50. var lessraw = JSON.parse(raw) || {};
  51. chartags = new Set(lessraw);
  52. }
  53.  
  54. const worksOnPage = array(document.querySelectorAll("li.work"))
  55. //For each work on the page
  56. for (let a of worksOnPage) {
  57. //Grabs the character tags
  58. //If you want to, you could collect multiple tag types into multiple lists!
  59. let chars = array(a.querySelectorAll(".characters"))
  60. for (let b of chars) {
  61. let chartag = b.innerText;
  62.  
  63. //Saves the ones we haven't seen before
  64. if (!chartags.has(chartag)) {
  65. chartags.add(chartag)
  66. }
  67. }
  68. }
  69.  
  70. //Saves the dictionary data across pages
  71. //https://wiki.greasespot.net/GM.setValue
  72. //On tapermonkey, you can see the info saved by opening the script and clicking storage!
  73. var arrayeddata = Array.from(chartags)
  74. var formatteddata = JSON.stringify(arrayeddata)
  75. GM_setValue(key, formatteddata);
  76.  
  77. //If on the last page of results, show them off~!!
  78. if (document.querySelector(".next .disabled") != null) {
  79. // for some reason this seems to always be present on the page, even if there is no content in it
  80. var flash = document.getElementsByClassName("flash")[0]
  81. flash.innerHTML = "";
  82. flash.classList.add("notice")
  83.  
  84. flash.appendChild(document.createTextNode("The following tags were collected: "));
  85. var spacer = ""
  86. for (var tag of arrayeddata) {
  87. flash.appendChild(document.createTextNode(spacer + "\u000a"))
  88. const url_safe_tag = tag.replace("/", "*s*").replace(".", "*d*").replace("#", "*h*").replace("?", "*q*");
  89. const taga = document.createElement("a")
  90. taga.href = "/tags/" + url_safe_tag; // fingers crossed
  91. taga.target = "_blank"
  92. taga.innerText = tag;
  93. flash.appendChild(taga);
  94. spacer = ", "
  95. }
  96. }
  97.  
  98. })();