AI Image Description Generator

使用AI生成网页图片描述

当前为 2024-12-03 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name AI Image Description Generator
  3. // @namespace http://tampermonkey.net/
  4. // @version 1.4
  5. // @description 使用AI生成网页图片描述
  6. // @author AlphaCat
  7. // @match *://*/*
  8. // @grant GM_xmlhttpRequest
  9. // @grant GM_addStyle
  10. // @grant GM_setValue
  11. // @grant GM_getValue
  12. // @grant GM_registerMenuCommand
  13. // @grant GM_setClipboard
  14. // @license MIT
  15. // ==/UserScript==
  16.  
  17. (function() {
  18. 'use strict';
  19.  
  20. // 添加样式
  21. GM_addStyle(`
  22. .ai-config-modal {
  23. position: fixed;
  24. top: 50%;
  25. left: 50%;
  26. transform: translate(-50%, -50%);
  27. background: white;
  28. padding: 20px;
  29. border-radius: 8px;
  30. box-shadow: 0 2px 10px rgba(0,0,0,0.1);
  31. z-index: 10000;
  32. min-width: 500px;
  33. height: auto;
  34. }
  35. .ai-config-modal h3 {
  36. margin: 0 0 15px 0;
  37. font-size: 14px;
  38. font-weight: bold;
  39. color: #333;
  40. }
  41. .ai-config-modal label {
  42. display: inline-block;
  43. font-size: 12px;
  44. font-weight: bold;
  45. color: #333;
  46. margin: 0;
  47. line-height: normal;
  48. height: auto;
  49. }
  50. .ai-config-modal .input-wrapper {
  51. position: relative;
  52. display: flex;
  53. align-items: center;
  54. }
  55. .ai-config-modal input {
  56. display: block;
  57. width: 100%;
  58. padding: 2px 24px 2px 2px;
  59. margin: 2px;
  60. border: 1px solid #ddd;
  61. border-radius: 4px;
  62. font-size: 13px;
  63. line-height: normal;
  64. height: auto;
  65. box-sizing: border-box;
  66. }
  67. .ai-config-modal .input-icon {
  68. position: absolute;
  69. right: 4px;
  70. width: 16px;
  71. height: 16px;
  72. cursor: pointer;
  73. display: flex;
  74. align-items: center;
  75. justify-content: center;
  76. color: #666;
  77. font-size: 12px;
  78. user-select: none;
  79. }
  80. .ai-config-modal .clear-icon {
  81. right: 24px;
  82. }
  83. .ai-config-modal .toggle-password {
  84. right: 4px;
  85. }
  86. .ai-config-modal .input-icon:hover {
  87. color: #333;
  88. }
  89. .ai-config-modal .input-group {
  90. margin-bottom: 12px;
  91. height: auto;
  92. display: flex;
  93. flex-direction: column;
  94. }
  95. .ai-config-modal .button-row {
  96. display: flex;
  97. gap: 10px;
  98. align-items: center;
  99. margin-top: 5px;
  100. }
  101. .ai-config-modal .check-button {
  102. padding: 4px 8px;
  103. border: none;
  104. border-radius: 4px;
  105. background: #007bff;
  106. color: white;
  107. cursor: pointer;
  108. font-size: 12px;
  109. }
  110. .ai-config-modal .check-button:hover {
  111. background: #0056b3;
  112. }
  113. .ai-config-modal .check-button:disabled {
  114. background: #cccccc;
  115. cursor: not-allowed;
  116. }
  117. .ai-config-modal select {
  118. width: 100%;
  119. padding: 4px;
  120. border: 1px solid #ddd;
  121. border-radius: 4px;
  122. font-size: 13px;
  123. margin-top: 2px;
  124. }
  125. .ai-config-modal .status-text {
  126. font-size: 12px;
  127. margin-left: 10px;
  128. }
  129. .ai-config-modal .status-success {
  130. color: #28a745;
  131. }
  132. .ai-config-modal .status-error {
  133. color: #dc3545;
  134. }
  135. .ai-config-modal button {
  136. margin: 10px 5px;
  137. padding: 8px 15px;
  138. border: none;
  139. border-radius: 4px;
  140. cursor: pointer;
  141. font-size: 14px;
  142. }
  143. .ai-config-modal button#ai-save-config {
  144. background: #4CAF50;
  145. color: white;
  146. }
  147. .ai-config-modal button#ai-cancel-config {
  148. background: #dc3545;
  149. color: white;
  150. }
  151. .ai-config-modal button:hover {
  152. opacity: 0.9;
  153. }
  154. .ai-floating-btn {
  155. position: fixed;
  156. width: 32px;
  157. height: 32px;
  158. background: #4CAF50;
  159. color: white;
  160. border-radius: 50%;
  161. cursor: move;
  162. z-index: 9999;
  163. box-shadow: 0 2px 5px rgba(0,0,0,0.2);
  164. display: flex;
  165. align-items: center;
  166. justify-content: center;
  167. user-select: none;
  168. transition: background-color 0.3s;
  169. }
  170. .ai-floating-btn:hover {
  171. background: #45a049;
  172. }
  173. .ai-floating-btn svg {
  174. width: 20px;
  175. height: 20px;
  176. fill: white;
  177. }
  178. .ai-menu {
  179. position: absolute;
  180. background: white;
  181. border-radius: 5px;
  182. box-shadow: 0 2px 10px rgba(0,0,0,0.1);
  183. padding: 8px;
  184. z-index: 10000;
  185. display: flex;
  186. gap: 8px;
  187. }
  188. .ai-menu-item {
  189. width: 32px;
  190. height: 32px;
  191. padding: 6px;
  192. cursor: pointer;
  193. border-radius: 50%;
  194. display: flex;
  195. align-items: center;
  196. justify-content: center;
  197. transition: background-color 0.3s;
  198. }
  199. .ai-menu-item:hover {
  200. background: #f5f5f5;
  201. }
  202. .ai-menu-item svg {
  203. width: 20px;
  204. height: 20px;
  205. fill: #666;
  206. }
  207. .ai-menu-item:hover svg {
  208. fill: #4CAF50;
  209. }
  210. .ai-modal-overlay {
  211. position: fixed;
  212. top: 0;
  213. left: 0;
  214. right: 0;
  215. bottom: 0;
  216. background: rgba(0, 0, 0, 0.5);
  217. z-index: 9999;
  218. }
  219. .ai-image-options {
  220. display: flex;
  221. flex-direction: column;
  222. gap: 10px;
  223. margin: 15px 0;
  224. }
  225. .ai-image-options button {
  226. padding: 8px 15px;
  227. border: none;
  228. border-radius: 4px;
  229. background: #4CAF50;
  230. color: white;
  231. cursor: pointer;
  232. transition: background-color 0.3s;
  233. font-size: 14px;
  234. }
  235. .ai-image-options button:hover {
  236. background: #45a049;
  237. }
  238. #ai-cancel {
  239. background: #dc3545;
  240. color: white;
  241. }
  242. #ai-cancel:hover {
  243. opacity: 0.9;
  244. }
  245. .ai-toast {
  246. position: fixed;
  247. top: 20px;
  248. left: 50%;
  249. transform: translateX(-50%);
  250. padding: 10px 20px;
  251. background: rgba(0, 0, 0, 0.8);
  252. color: white;
  253. border-radius: 4px;
  254. font-size: 14px;
  255. z-index: 10000;
  256. animation: fadeInOut 3s ease;
  257. pointer-events: none;
  258. white-space: pre-line;
  259. text-align: center;
  260. max-width: 80%;
  261. box-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
  262. }
  263. @keyframes fadeInOut {
  264. 0% { opacity: 0; transform: translate(-50%, 10px); }
  265. 10% { opacity: 1; transform: translate(-50%, 0); }
  266. 90% { opacity: 1; transform: translate(-50%, 0); }
  267. 100% { opacity: 0; transform: translate(-50%, -10px); }
  268. }
  269. .ai-config-modal .button-group {
  270. display: flex;
  271. justify-content: flex-end;
  272. gap: 10px;
  273. margin-top: 20px;
  274. }
  275. .ai-config-modal .button-group button {
  276. padding: 6px 16px;
  277. border: none;
  278. border-radius: 4px;
  279. cursor: pointer;
  280. font-size: 14px;
  281. transition: background-color 0.2s;
  282. }
  283. .ai-config-modal .save-button {
  284. background: #007bff;
  285. color: white;
  286. }
  287. .ai-config-modal .save-button:hover {
  288. background: #0056b3;
  289. }
  290. .ai-config-modal .save-button:disabled {
  291. background: #cccccc;
  292. cursor: not-allowed;
  293. }
  294. .ai-config-modal .cancel-button {
  295. background: #f8f9fa;
  296. color: #333;
  297. }
  298. .ai-config-modal .cancel-button:hover {
  299. background: #e2e6ea;
  300. }
  301. .ai-selecting-image {
  302. cursor: crosshair !important;
  303. }
  304. .ai-selecting-image * {
  305. cursor: crosshair !important;
  306. }
  307. .ai-image-description {
  308. position: fixed;
  309. background: rgba(0, 0, 0, 0.8);
  310. color: white;
  311. padding: 10px 15px;
  312. border-radius: 4px;
  313. font-size: 14px;
  314. line-height: 1.5;
  315. max-width: 300px;
  316. word-wrap: break-word;
  317. z-index: 10000;
  318. pointer-events: none;
  319. animation: fadeIn 0.3s ease;
  320. }
  321. @keyframes fadeIn {
  322. from { opacity: 0; }
  323. to { opacity: 1; }
  324. }
  325. .ai-result-modal {
  326. position: fixed;
  327. top: 50%;
  328. left: 50%;
  329. transform: translate(-50%, -50%);
  330. background: white;
  331. padding: 20px;
  332. border-radius: 8px;
  333. box-shadow: 0 2px 10px rgba(0, 0, 0, 0.2);
  334. z-index: 10001;
  335. max-width: 80%;
  336. width: 500px;
  337. }
  338. .ai-result-modal h3 {
  339. margin: 0 0 15px 0;
  340. font-size: 16px;
  341. color: #333;
  342. }
  343. .ai-result-modal .code-block {
  344. background: #f5f5f5;
  345. padding: 15px;
  346. border-radius: 4px;
  347. margin: 10px 0;
  348. cursor: pointer;
  349. position: relative;
  350. white-space: pre-wrap;
  351. word-wrap: break-word;
  352. font-family: monospace;
  353. transition: background-color 0.2s;
  354. }
  355. .ai-result-modal .code-block:hover {
  356. background: #ebebeb;
  357. }
  358. .ai-result-modal .code-block::after {
  359. content: "点击复制";
  360. position: absolute;
  361. top: 5px;
  362. right: 5px;
  363. font-size: 12px;
  364. color: #666;
  365. background: rgba(255, 255, 255, 0.8);
  366. padding: 2px 6px;
  367. border-radius: 3px;
  368. opacity: 0;
  369. transition: opacity 0.2s;
  370. }
  371. .ai-result-modal .code-block:hover::after {
  372. opacity: 1;
  373. }
  374. .ai-result-modal .code-block.copied::after {
  375. content: "已复制!";
  376. color: #4CAF50;
  377. }
  378. .ai-result-modal .close-button {
  379. position: absolute;
  380. top: 10px;
  381. right: 10px;
  382. background: none;
  383. border: none;
  384. font-size: 20px;
  385. cursor: pointer;
  386. color: #666;
  387. padding: 5px;
  388. line-height: 1;
  389. }
  390. .ai-result-modal .close-button:hover {
  391. color: #333;
  392. }
  393. `);
  394.  
  395. // 密码显示切换功能
  396. function togglePassword(element) {
  397. const input = element.parentElement.querySelector('input');
  398. if (input.type === 'password') {
  399. input.type = 'text';
  400. element.textContent = '👁️‍🗨️';
  401. } else {
  402. input.type = 'password';
  403. element.textContent = '👁️';
  404. }
  405. }
  406.  
  407. // 检查API配置并获取可用模型
  408. async function checkApiAndGetModels(apiEndpoint, apiKey) {
  409. try {
  410. const response = await fetch(`${apiEndpoint}/v1/models`, {
  411. method: 'GET',
  412. headers: {
  413. 'Authorization': `Bearer ${apiKey}`,
  414. 'Content-Type': 'application/json'
  415. }
  416. });
  417.  
  418. if (!response.ok) {
  419. throw new Error(`HTTP error! status: ${response.status}`);
  420. }
  421.  
  422. const result = await response.json();
  423. if (result.data && Array.isArray(result.data)) {
  424. // 过滤出多模态模型
  425. const multimodalModels = result.data
  426. .filter(model => model.id.includes('vision') || model.id.includes('gpt-4-v'))
  427. .map(model => ({
  428. id: model.id,
  429. name: model.id
  430. }));
  431. return multimodalModels;
  432. } else {
  433. throw new Error('Invalid response format');
  434. }
  435. } catch (error) {
  436. console.error('Error fetching models:', error);
  437. throw error;
  438. }
  439. }
  440.  
  441. // 检查API配置
  442. async function checkApiConfig() {
  443. const apiEndpoint = GM_getValue('apiEndpoint', '').trim();
  444. const apiKey = GM_getValue('apiKey', '').trim();
  445. const selectedModel = GM_getValue('selectedModel', '').trim();
  446.  
  447. if (!apiEndpoint || !apiKey || !selectedModel) {
  448. alert('请先配置API Endpoint、API Key和模型');
  449. showConfigModal();
  450. return false;
  451. }
  452.  
  453. try {
  454. const models = await checkApiAndGetModels(apiEndpoint, apiKey);
  455. if (models.length === 0) {
  456. alert('无法获取可用模型列表,请检查API配置是否正确');
  457. return false;
  458. }
  459. // 可以在这里添加模型选择的逻辑
  460. return true;
  461. } catch (error) {
  462. console.error('Error checking API config:', error);
  463. alert('API配置验证失败,请检查配置是否正确');
  464. return false;
  465. }
  466. }
  467.  
  468. // 描述所有图片
  469. function describeAllImages() {
  470. const images = document.querySelectorAll('img');
  471. console.log('开始处理所有图片,共找到:', images.length, '张图片');
  472. processImages(Array.from(images));
  473. }
  474.  
  475. // 描述可见图片
  476. function describeVisibleImages() {
  477. const images = Array.from(document.querySelectorAll('img')).filter(img => {
  478. const rect = img.getBoundingClientRect();
  479. return (
  480. rect.top >= 0 &&
  481. rect.left >= 0 &&
  482. rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) &&
  483. rect.right <= (window.innerWidth || document.documentElement.clientWidth)
  484. );
  485. });
  486. console.log('开始处理可见图片,共找到:', images.length, '张图片');
  487. processImages(images);
  488. }
  489.  
  490. // 处理图片
  491. async function processImages(images) {
  492. const apiKey = GM_getValue('apiKey', '');
  493. const endpoint = GM_getValue('apiEndpoint', '');
  494. const selectedModel = GM_getValue('selectedModel', '');
  495.  
  496. for (const img of images) {
  497. try {
  498. // 如果图片已经有alt文本,跳过
  499. if (img.alt && img.alt.length > 0) {
  500. console.log('图片已有描述,跳过:', img.alt);
  501. continue;
  502. }
  503.  
  504. // 获取图片URL
  505. const imageUrl = img.src;
  506. console.log('处理图片:', imageUrl);
  507.  
  508. // 调用API获取描述
  509. const description = await getImageDescription(imageUrl, endpoint, apiKey, selectedModel);
  510. // 更新图片alt文本
  511. if (description) {
  512. img.alt = description;
  513. img.title = description;
  514. console.log('已添加描述:', description);
  515. }
  516. } catch (error) {
  517. console.error('处理图片时出错:', error);
  518. }
  519. }
  520. }
  521.  
  522. // 获取图片的Base64内容
  523. async function getImageBase64(imageUrl) {
  524. console.log('[Debug] Starting image to Base64 conversion for:', imageUrl);
  525. try {
  526. const response = await fetch(imageUrl);
  527. console.log('[Debug] Image fetch response:', response.status, response.statusText);
  528. const blob = await response.blob();
  529. console.log('[Debug] Image blob size:', blob.size, 'bytes');
  530. return new Promise((resolve, reject) => {
  531. const reader = new FileReader();
  532. reader.onloadend = () => {
  533. const base64 = reader.result.split(',')[1];
  534. console.log('[Debug] Base64 conversion completed, length:', base64.length);
  535. resolve(base64);
  536. };
  537. reader.onerror = (error) => {
  538. console.error('[Debug] FileReader error:', error);
  539. reject(error);
  540. };
  541. reader.readAsDataURL(blob);
  542. });
  543. } catch (error) {
  544. console.error('[Debug] Error converting image to Base64:', error);
  545. throw error;
  546. }
  547. }
  548.  
  549. // 调用API获取图片描述
  550. async function getImageDescription(imageUrl, apiEndpoint, apiKey, selectedModel) {
  551. console.log('[Debug] Starting image description request:', {
  552. apiEndpoint,
  553. selectedModel,
  554. imageUrl
  555. });
  556.  
  557. try {
  558. const base64Image = await getImageBase64(imageUrl);
  559. const requestBody = {
  560. model: selectedModel,
  561. messages: [{
  562. role: "user",
  563. content: [
  564. {
  565. type: "image_url",
  566. image_url: {
  567. url: `data:image/jpeg;base64,${base64Image}`
  568. }
  569. },
  570. {
  571. type: "text",
  572. text: "Describe the image. Answer in Chinese."
  573. }
  574. ]
  575. }],
  576. stream: true
  577. };
  578.  
  579. console.log('[Debug] API Request body:', JSON.stringify(requestBody, null, 2));
  580.  
  581. const response = await fetch(`${apiEndpoint}/chat/completions`, {
  582. method: 'POST',
  583. headers: {
  584. 'Authorization': `Bearer ${apiKey}`,
  585. 'Content-Type': 'application/json'
  586. },
  587. body: JSON.stringify(requestBody)
  588. });
  589.  
  590. console.log('[Debug] API Response status:', response.status, response.statusText);
  591. if (!response.ok) {
  592. const errorText = await response.text();
  593. console.error('[Debug] API Error response:', errorText);
  594. throw new Error(`HTTP error! status: ${response.status}`);
  595. }
  596.  
  597. const reader = response.body.getReader();
  598. const decoder = new TextDecoder();
  599. let description = '';
  600. let chunkCounter = 0;
  601.  
  602. while (true) {
  603. const { value, done } = await reader.read();
  604. if (done) {
  605. console.log('[Debug] Stream completed. Final description:', description);
  606. showDescriptionModal(description);
  607. break;
  608. }
  609.  
  610. const chunk = decoder.decode(value);
  611. console.log(`[Debug] Received chunk #${++chunkCounter}:`, chunk);
  612.  
  613. const lines = chunk.split('\n').filter(line => line.trim() !== '');
  614.  
  615. for (const line of lines) {
  616. if (line.startsWith('data: ')) {
  617. const jsonStr = line.slice(6);
  618. if (jsonStr === '[DONE]') {
  619. console.log('[Debug] Received [DONE] signal');
  620. continue;
  621. }
  622. try {
  623. const jsonData = JSON.parse(jsonStr);
  624. console.log('[Debug] Parsed JSON data:', jsonData);
  625. const content = jsonData.choices[0]?.delta?.content;
  626. if (content) {
  627. description += content;
  628. console.log('[Debug] Updated description:', description);
  629. updateDescriptionTooltip(description);
  630. }
  631. } catch (e) {
  632. console.error('[Debug] Error parsing JSON:', e, 'Raw string:', jsonStr);
  633. }
  634. }
  635. }
  636. }
  637.  
  638. return description;
  639. } catch (error) {
  640. console.error('[Debug] Error in getImageDescription:', error);
  641. throw error;
  642. }
  643. }
  644.  
  645. // 显示描述tooltip
  646. function showDescriptionTooltip(description, x, y) {
  647. const tooltip = document.createElement('div');
  648. tooltip.className = 'ai-image-description';
  649. tooltip.textContent = description;
  650. tooltip.style.left = `${x}px`;
  651. tooltip.style.top = `${y}px`;
  652. document.body.appendChild(tooltip);
  653. return tooltip;
  654. }
  655.  
  656. // 更新描述tooltip内容
  657. function updateDescriptionTooltip(description) {
  658. const tooltip = document.querySelector('.ai-image-description');
  659. if (tooltip) {
  660. tooltip.textContent = description;
  661. }
  662. }
  663.  
  664. // 移除描述tooltip
  665. function removeDescriptionTooltip() {
  666. const tooltip = document.querySelector('.ai-image-description');
  667. if (tooltip) {
  668. tooltip.remove();
  669. }
  670. }
  671.  
  672. // 进入图片选择模式
  673. function enterImageSelectionMode() {
  674. console.log('[Debug] Entering image selection mode');
  675. document.body.style.cursor = 'crosshair';
  676. isSelectionMode = true;
  677. // 创建点击事件处理函数
  678. const clickHandler = async function(e) {
  679. e.preventDefault();
  680. e.stopPropagation();
  681.  
  682. if (e.target.tagName === 'IMG') {
  683. console.log('[Debug] Image clicked:', e.target.src);
  684. // 获取配置
  685. const endpoint = GM_getValue('apiEndpoint', '');
  686. const apiKey = GM_getValue('apiKey', '');
  687. const selectedModel = GM_getValue('selectedModel', '');
  688.  
  689. console.log('[Debug] Current configuration:', {
  690. endpoint,
  691. selectedModel,
  692. hasApiKey: !!apiKey
  693. });
  694.  
  695. if (!endpoint || !apiKey || !selectedModel) {
  696. showToast('请先完成API配置');
  697. exitImageSelectionMode();
  698. return;
  699. }
  700.  
  701. // 显示加载中的tooltip
  702. const tooltip = showDescriptionTooltip('正在生成描述...', e.pageX + 10, e.pageY + 10);
  703.  
  704. try {
  705. await getImageDescription(e.target.src, endpoint, apiKey, selectedModel);
  706. } catch (error) {
  707. console.error('[Debug] Description generation failed:', error);
  708. removeDescriptionTooltip();
  709. showToast('生成描述失败: ' + error.message);
  710. }
  711.  
  712. // 处理完一张图片后自动退出选择模式
  713. exitImageSelectionMode();
  714. // 重要:移除事件监听器
  715. document.removeEventListener('click', clickHandler, true);
  716. }
  717. };
  718.  
  719. // 添加点击事件监听器
  720. document.addEventListener('click', clickHandler, true);
  721. // ESC键退出选择模式
  722. const escHandler = (e) => {
  723. if (e.key === 'Escape') {
  724. exitImageSelectionMode();
  725. // 重要:移除所有相关的事件监听器
  726. document.removeEventListener('click', clickHandler, true);
  727. document.removeEventListener('keydown', escHandler);
  728. }
  729. };
  730. document.addEventListener('keydown', escHandler);
  731. }
  732.  
  733. // 退出图片选择模式
  734. function exitImageSelectionMode() {
  735. console.log('[Debug] Exiting image selection mode');
  736. document.body.style.cursor = 'default';
  737. isSelectionMode = false;
  738. }
  739.  
  740. // 显示toast提示
  741. function showToast(message, duration = 3000) {
  742. const toast = document.createElement('div');
  743. toast.className = 'ai-toast';
  744. toast.textContent = message;
  745. document.body.appendChild(toast);
  746. setTimeout(() => {
  747. toast.remove();
  748. }, duration);
  749. }
  750.  
  751. // 检查用户信息
  752. async function checkUserInfo(apiEndpoint, apiKey) {
  753. try {
  754. const response = await fetch(`${apiEndpoint}/v1/user/info`, {
  755. method: 'GET',
  756. headers: {
  757. 'Authorization': `Bearer ${apiKey}`
  758. }
  759. });
  760.  
  761. if (!response.ok) {
  762. throw new Error(`HTTP error! status: ${response.status}`);
  763. }
  764.  
  765. const result = await response.json();
  766. console.log('[Debug] User Info API Response:', result);
  767. if (result.code === 20000 && result.status && result.data) {
  768. const { name, balance, chargeBalance, totalBalance } = result.data;
  769. return {
  770. name,
  771. balance: parseFloat(balance),
  772. chargeBalance: parseFloat(chargeBalance),
  773. totalBalance: parseFloat(totalBalance)
  774. };
  775. } else {
  776. throw new Error(result.message || 'Invalid response format');
  777. }
  778. } catch (error) {
  779. console.error('[Debug] User Info API Error:', error);
  780. throw error;
  781. }
  782. }
  783.  
  784. // 获取可用模型列表
  785. async function getAvailableModels(apiEndpoint, apiKey) {
  786. // 定义支持的视觉模型列表
  787. const supportedVLModels = [
  788. 'Qwen/Qwen2-VL-72B-Instruct',
  789. 'Pro/Qwen/Qwen2-VL-7B-Instruct',
  790. 'OpenGVLab/InternVL2-Llama3-76B',
  791. 'OpenGVLab/InternVL2-26B',
  792. 'Pro/OpenGVLab/InternVL2-8B'
  793. ];
  794.  
  795. try {
  796. const response = await fetch(`${apiEndpoint}/v1/models`, {
  797. method: 'GET',
  798. headers: {
  799. 'Authorization': `Bearer ${apiKey}`
  800. }
  801. });
  802.  
  803. if (!response.ok) {
  804. throw new Error(`HTTP error! status: ${response.status}`);
  805. }
  806.  
  807. const result = await response.json();
  808. console.log('[Debug] Models API Response:', result);
  809. if (result.object === 'list' && Array.isArray(result.data)) {
  810. // 筛选出支持的视觉模型
  811. const models = result.data
  812. .filter(model => supportedVLModels.includes(model.id))
  813. .map(model => ({
  814. id: model.id,
  815. // 美化显示名称
  816. name: model.id.split('/').pop()
  817. .replace('Qwen2-VL-', 'Qwen2-')
  818. .replace('InternVL2-Llama3-', 'InternVL2-')
  819. .replace('-Instruct', '')
  820. }));
  821.  
  822. console.log('[Debug] Available VL Models:', models);
  823. if (models.length === 0) {
  824. console.warn('[Debug] No supported VL models found in the response');
  825. }
  826. return models;
  827. } else {
  828. throw new Error('Invalid models response format');
  829. }
  830. } catch (error) {
  831. console.error('[Debug] Models API Error:', error);
  832. throw error;
  833. }
  834. }
  835.  
  836. // 更新模型下拉菜单
  837. function updateModelSelect(selectElement, models) {
  838. if (models.length === 0) {
  839. selectElement.innerHTML = '<option value="">未找到可用的视觉模型</option>';
  840. selectElement.disabled = true;
  841. return;
  842. }
  843.  
  844. selectElement.innerHTML = '<option value="">请选择视觉模型</option>' +
  845. models.map(model =>
  846. `<option value="${model.id}" title="${model.id}">${model.name}</option>`
  847. ).join('');
  848. selectElement.disabled = false;
  849. }
  850.  
  851. // 保存模型列表到GM存储
  852. function saveModelList(models) {
  853. GM_setValue('availableModels', models);
  854. }
  855.  
  856. // 从GM存储获取模型列表
  857. function getStoredModelList() {
  858. return GM_getValue('availableModels', []);
  859. }
  860.  
  861. // 创建悬浮按钮
  862. function createFloatingButton() {
  863. const btn = document.createElement('div');
  864. btn.className = 'ai-floating-btn';
  865. btn.innerHTML = `
  866. <svg viewBox="0 0 24 24">
  867. <path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm0 18c-4.42 0-8-3.58-8-8s3.58-8 8-8 8 3.58 8 8-3.58 8-8 8zm0-14c-3.31 0-6 2.69-6 6s2.69 6 6 6 6-2.69 6-6-2.69-6-6-6zm0 10c-2.21 0-4-1.79-4-4h2c0-1.1.9-2 2-2s2 .9 2 2c0 2-3 1.75-3 5h2c0-2.25 3-2.5 3-5 0-2.21-1.79-4-4-4z"/>
  868. </svg>
  869. `;
  870.  
  871. // 添加点击事件处理
  872. btn.addEventListener('click', function(e) {
  873. if (e.button === 0) { // 左键点击
  874. enterImageSelectionMode();
  875. e.stopPropagation(); // 阻止事件冒泡
  876. }
  877. });
  878.  
  879. // 右键点击显示配置
  880. btn.addEventListener('contextmenu', function(e) {
  881. e.preventDefault();
  882. exitImageSelectionMode();
  883. createConfigUI();
  884. });
  885.  
  886. // ESC键退出选择模式
  887. document.addEventListener('keydown', function(e) {
  888. if (e.key === 'Escape') {
  889. exitImageSelectionMode();
  890. removeDescriptionTooltip();
  891. }
  892. });
  893.  
  894. // 设置初始位置为左上角
  895. const savedPos = JSON.parse(GM_getValue('btnPosition', '{"x": 20, "y": 20}'));
  896. btn.style.left = (savedPos.x || 20) + 'px';
  897. btn.style.top = (savedPos.y || 20) + 'px';
  898. btn.style.right = 'auto';
  899. btn.style.bottom = 'auto';
  900.  
  901. let isDragging = false;
  902. let startX, startY;
  903. let initialLeft, initialTop;
  904.  
  905. function dragStart(e) {
  906. if (e.target === btn || btn.contains(e.target)) {
  907. isDragging = true;
  908. const rect = btn.getBoundingClientRect();
  909. startX = e.clientX;
  910. startY = e.clientY;
  911. initialLeft = rect.left;
  912. initialTop = rect.top;
  913. e.preventDefault();
  914. }
  915. }
  916.  
  917. function drag(e) {
  918. if (isDragging) {
  919. e.preventDefault();
  920. const deltaX = e.clientX - startX;
  921. const deltaY = e.clientY - startY;
  922. const newLeft = Math.max(0, Math.min(window.innerWidth - btn.offsetWidth, initialLeft + deltaX));
  923. const newTop = Math.max(0, Math.min(window.innerHeight - btn.offsetHeight, initialTop + deltaY));
  924. btn.style.left = newLeft + 'px';
  925. btn.style.top = newTop + 'px';
  926. }
  927. }
  928.  
  929. function dragEnd(e) {
  930. if (isDragging) {
  931. isDragging = false;
  932. const rect = btn.getBoundingClientRect();
  933. GM_setValue('btnPosition', JSON.stringify({
  934. x: rect.left,
  935. y: rect.top
  936. }));
  937. }
  938. }
  939.  
  940. btn.addEventListener('mousedown', dragStart);
  941. document.addEventListener('mousemove', drag);
  942. document.addEventListener('mouseup', dragEnd);
  943.  
  944. // 添加悬浮菜单
  945. let menu = null;
  946. let menuTimeout = null;
  947. function showMainMenu() {
  948. if (menu) return;
  949.  
  950. menu = document.createElement('div');
  951. menu.className = 'ai-menu';
  952. menu.innerHTML = `
  953. <div class="ai-menu-item" id="ai-describe-images" title="选择要识别的图像">
  954. <svg viewBox="0 0 24 24">
  955. <path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm0 18c-4.42 0-8-3.58-8-8s3.58-8 8-8 8 3.58 8 8-3.58 8-8 8zm0-14c-3.31 0-6 2.69-6 6s2.69 6 6 6 6-2.69 6-6-2.69-6-6-6zm0 10c-2.21 0-4-1.79-4-4h2c0-1.1.9-2 2-2s2 .9 2 2c0 2-3 1.75-3 5h2c0-2.25 3-2.5 3-5 0-2.21-1.79-4-4-4z"/>
  956. </svg>
  957. </div>
  958. <div class="ai-menu-item" id="ai-settings" title="设置AI功能">
  959. <svg viewBox="0 0 24 24">
  960. <path d="M19.14 12.94c.04-.3.06-.61.06-.94 0-.32-.02-.64-.07-.94l2.03-1.58c.18-.14.23-.41.12-.61l-1.92-3.32c-.12-.22-.37-.29-.59-.22l-2.39.96c-.5-.38-1.03-.7-1.62-.94l-.36-2.54c-.04-.24-.24-.41-.48-.41h-3.84c-.24 0-.43.17-.47.41l-.36 2.54c-.59.24-1.13.57-1.62.94l-2.39-.96c-.22-.08-.47 0-.59.22L2.74 8.87c-.12.21-.08.47.12.61l2.03 1.58c-.05.3-.07.62-.07.94s.02.64.07.94l-2.03 1.58c-.18.14-.23.41-.12.61l1.92 3.32c.12.22.37.29.59.22l2.39-.96c.5.38 1.03.7 1.62.94l.36 2.54c.05.24.24.41.48.41h3.84c.24 0 .44-.17.47-.41l.36-2.54c.59-.24 1.13-.56 1.62-.94l2.39.96c.22.08.47 0 .59-.22l1.92-3.32c.12-.22.07-.47-.12-.61l-2.01-1.58zM12 15.6c-1.98 0-3.6-1.62-3.6-3.6s1.62-3.6 3.6-3.6 3.6 1.62 3.6 3.6-1.62 3.6-3.6 3.6z"/>
  961. </svg>
  962. </div>
  963. `;
  964. // 计算菜单位置
  965. const btnRect = btn.getBoundingClientRect();
  966. menu.style.left = (btnRect.right + 5) + 'px';
  967. menu.style.top = btnRect.top + 'px';
  968.  
  969. document.body.appendChild(menu);
  970.  
  971. // 为菜单添加鼠标进入和离开事件
  972. menu.addEventListener('mouseenter', () => {
  973. if (menuTimeout) {
  974. clearTimeout(menuTimeout);
  975. menuTimeout = null;
  976. }
  977. });
  978.  
  979. menu.addEventListener('mouseleave', () => {
  980. hideMainMenu();
  981. });
  982.  
  983. // 添加菜单项点击事件
  984. menu.querySelector('#ai-describe-images').onclick = () => {
  985. menu.remove();
  986. menu = null;
  987. showImageSelectionModal();
  988. };
  989. menu.querySelector('#ai-settings').onclick = () => {
  990. menu.remove();
  991. menu = null;
  992. createConfigUI();
  993. };
  994. }
  995.  
  996. function hideMainMenu() {
  997. if (menuTimeout) {
  998. clearTimeout(menuTimeout);
  999. }
  1000. menuTimeout = setTimeout(() => {
  1001. if (menu) {
  1002. menu.remove();
  1003. menu = null;
  1004. }
  1005. menuTimeout = null;
  1006. }, 300); // 300ms延迟,避免菜单闪烁
  1007. }
  1008.  
  1009. // 添加鼠标进入和离开事件
  1010. btn.addEventListener('mouseenter', () => {
  1011. if (menuTimeout) {
  1012. clearTimeout(menuTimeout);
  1013. menuTimeout = null;
  1014. }
  1015. showMainMenu();
  1016. });
  1017.  
  1018. btn.addEventListener('mouseleave', () => {
  1019. hideMainMenu();
  1020. });
  1021.  
  1022. document.body.appendChild(btn);
  1023. return btn;
  1024. }
  1025.  
  1026. // 创建配置界面
  1027. function createConfigUI() {
  1028. const overlay = document.createElement('div');
  1029. overlay.className = 'ai-modal-overlay';
  1030. const modal = document.createElement('div');
  1031. modal.className = 'ai-config-modal';
  1032. modal.innerHTML = `
  1033. <h3>AI图像描述配置</h3>
  1034. <div class="input-group">
  1035. <label>API Endpoint:</label>
  1036. <div class="input-wrapper">
  1037. <input type="text" id="ai-endpoint" placeholder="https://api.openai.com" value="${GM_getValue('apiEndpoint', '')}">
  1038. <span class="input-icon clear-icon" title="清空" onclick="this.previousElementSibling.value=''">✕</span>
  1039. </div>
  1040. </div>
  1041. <div class="input-group">
  1042. <label>API Key:</label>
  1043. <div class="input-wrapper">
  1044. <input type="password" id="ai-apikey" value="${GM_getValue('apiKey', '')}">
  1045. <span class="input-icon clear-icon" title="清空" onclick="this.previousElementSibling.value=''">✕</span>
  1046. <span class="input-icon toggle-password" title="显示/隐藏密码">👁️</span>
  1047. </div>
  1048. <div class="button-row">
  1049. <button class="check-button" id="check-api">检测可用性</button>
  1050. </div>
  1051. </div>
  1052. <div class="input-group">
  1053. <label>可用模型:</label>
  1054. <select id="ai-model">
  1055. <option value="">加载中...</option>
  1056. </select>
  1057. </div>
  1058. <div class="button-group">
  1059. <button class="cancel-button" id="ai-cancel-config">取消</button>
  1060. <button class="save-button" id="ai-save-config">保存</button>
  1061. </div>
  1062. `;
  1063.  
  1064. overlay.appendChild(modal);
  1065. document.body.appendChild(overlay);
  1066.  
  1067. // 初始化模型下拉菜单
  1068. const modelSelect = modal.querySelector('#ai-model');
  1069. const storedModels = getStoredModelList();
  1070. const selectedModel = GM_getValue('selectedModel', '');
  1071. if (storedModels.length > 0) {
  1072. updateModelSelect(modelSelect, storedModels);
  1073. if (selectedModel) {
  1074. modelSelect.value = selectedModel;
  1075. }
  1076. } else {
  1077. modelSelect.innerHTML = '<option value="">请先检测API可用性</option>';
  1078. modelSelect.disabled = true;
  1079. }
  1080.  
  1081. // 添加密码显示切换事件监听
  1082. const toggleBtn = modal.querySelector('.toggle-password');
  1083. toggleBtn.addEventListener('click', function() {
  1084. togglePassword(this);
  1085. });
  1086.  
  1087. // 自动保存配置
  1088. const inputs = modal.querySelectorAll('input');
  1089. inputs.forEach(input => {
  1090. input.addEventListener('blur', function() {
  1091. const endpoint = modal.querySelector('#ai-endpoint').value.trim();
  1092. const apiKey = modal.querySelector('#ai-apikey').value.trim();
  1093. if (endpoint && apiKey) {
  1094. GM_setValue('apiEndpoint', endpoint);
  1095. GM_setValue('apiKey', apiKey);
  1096. showToast('配置已保存');
  1097. }
  1098. });
  1099. });
  1100.  
  1101. // 检测API可用性
  1102. const checkButton = modal.querySelector('#check-api');
  1103. checkButton.addEventListener('click', async function() {
  1104. const endpoint = modal.querySelector('#ai-endpoint').value.trim();
  1105. const apiKey = modal.querySelector('#ai-apikey').value.trim();
  1106.  
  1107. if (!endpoint || !apiKey) {
  1108. showToast('请先填写API Endpoint和API Key');
  1109. return;
  1110. }
  1111.  
  1112. checkButton.disabled = true;
  1113. modelSelect.disabled = true;
  1114. modelSelect.innerHTML = '<option value="">检测中...</option>';
  1115.  
  1116. try {
  1117. // 并行请求用户信息和模型列表
  1118. const [userInfo, models] = await Promise.all([
  1119. checkUserInfo(endpoint, apiKey),
  1120. getAvailableModels(endpoint, apiKey)
  1121. ]);
  1122.  
  1123. // 保存模型列表
  1124. saveModelList(models);
  1125.  
  1126. // 更新模型下拉菜单
  1127. updateModelSelect(modelSelect, models);
  1128.  
  1129. // 显示用户信息
  1130. showToast(`检测通过,欢迎 ${userInfo.name}!\n账户余额:${userInfo.balance.toFixed(2)}\n充值余额:${userInfo.chargeBalance.toFixed(2)}\n总余额:${userInfo.totalBalance.toFixed(2)}`);
  1131.  
  1132. // 如果之前保存过模型选择,恢复选择
  1133. const savedModel = GM_getValue('selectedModel', '');
  1134. if (savedModel && models.some(m => m.id === savedModel)) {
  1135. modelSelect.value = savedModel;
  1136. }
  1137. } catch (error) {
  1138. showToast('API检测失败:' + error.message);
  1139. modelSelect.innerHTML = '<option value="">获取模型列表失败</option>';
  1140. modelSelect.disabled = true;
  1141. } finally {
  1142. checkButton.disabled = false;
  1143. }
  1144. });
  1145.  
  1146. // 模型选择变更时保存
  1147. modelSelect.addEventListener('change', function() {
  1148. if (this.value) {
  1149. GM_setValue('selectedModel', this.value);
  1150. showToast('已保存模型选择');
  1151. }
  1152. });
  1153.  
  1154. // 保存配置
  1155. const saveButton = modal.querySelector('#ai-save-config');
  1156. saveButton.addEventListener('click', function() {
  1157. const endpoint = modal.querySelector('#ai-endpoint').value.trim();
  1158. const apiKey = modal.querySelector('#ai-apikey').value.trim();
  1159. const selectedModel = modelSelect.value;
  1160.  
  1161. if (!endpoint || !apiKey) {
  1162. showToast('请填写API Endpoint和API Key');
  1163. return;
  1164. }
  1165.  
  1166. if (!selectedModel) {
  1167. showToast('请选择一个视觉模型');
  1168. return;
  1169. }
  1170.  
  1171. GM_setValue('apiEndpoint', endpoint);
  1172. GM_setValue('apiKey', apiKey);
  1173. GM_setValue('selectedModel', selectedModel);
  1174. showToast('配置已保存');
  1175. overlay.remove();
  1176. });
  1177.  
  1178. // 更新保存按钮状态
  1179. function updateSaveButtonState() {
  1180. const endpoint = modal.querySelector('#ai-endpoint').value.trim();
  1181. const apiKey = modal.querySelector('#ai-apikey').value.trim();
  1182. const selectedModel = modelSelect.value;
  1183. saveButton.disabled = !endpoint || !apiKey || !selectedModel;
  1184. }
  1185.  
  1186. // 监听输入变化
  1187. modal.querySelector('#ai-endpoint').addEventListener('input', updateSaveButtonState);
  1188. modal.querySelector('#ai-apikey').addEventListener('input', updateSaveButtonState);
  1189. modelSelect.addEventListener('change', updateSaveButtonState);
  1190.  
  1191. // 初始化保存按钮状态
  1192. updateSaveButtonState();
  1193.  
  1194. // 取消配置
  1195. modal.querySelector('#ai-cancel-config').onclick = () => {
  1196. overlay.remove();
  1197. };
  1198.  
  1199. // 点击遮罩层关闭
  1200. overlay.addEventListener('click', (e) => {
  1201. if (e.target === overlay) {
  1202. overlay.remove();
  1203. }
  1204. });
  1205. }
  1206.  
  1207. // 显示图像选择界面
  1208. function showImageSelectionModal() {
  1209. const overlay = document.createElement('div');
  1210. overlay.className = 'ai-modal-overlay';
  1211. const modal = document.createElement('div');
  1212. modal.className = 'ai-config-modal';
  1213. modal.innerHTML = `
  1214. <h3>选择要识别的图像</h3>
  1215. <div class="ai-image-options">
  1216. <button id="ai-all-images">识别所有图片</button>
  1217. <button id="ai-visible-images">仅识别可见图片</button>
  1218. </div>
  1219. <button id="ai-cancel">取消</button>
  1220. `;
  1221.  
  1222. overlay.appendChild(modal);
  1223. document.body.appendChild(overlay);
  1224.  
  1225. // 添加事件监听
  1226. modal.querySelector('#ai-all-images').onclick = () => {
  1227. if (checkApiConfig()) {
  1228. describeAllImages();
  1229. overlay.remove();
  1230. }
  1231. };
  1232.  
  1233. modal.querySelector('#ai-visible-images').onclick = () => {
  1234. if (checkApiConfig()) {
  1235. describeVisibleImages();
  1236. overlay.remove();
  1237. }
  1238. };
  1239.  
  1240. modal.querySelector('#ai-cancel').onclick = () => {
  1241. overlay.remove();
  1242. };
  1243.  
  1244. // 点击遮罩层关闭
  1245. overlay.addEventListener('click', (e) => {
  1246. if (e.target === overlay) {
  1247. overlay.remove();
  1248. }
  1249. });
  1250. }
  1251.  
  1252. // 显示描述结果对话框
  1253. function showDescriptionModal(description) {
  1254. const modal = document.createElement('div');
  1255. modal.className = 'ai-result-modal';
  1256. modal.innerHTML = `
  1257. <h3>图片描述结果</h3>
  1258. <div class="code-block">${description}</div>
  1259. <button class="close-button">&times;</button>
  1260. `;
  1261.  
  1262. // 添加复制功能
  1263. const codeBlock = modal.querySelector('.code-block');
  1264. codeBlock.addEventListener('click', async () => {
  1265. try {
  1266. await navigator.clipboard.writeText(description);
  1267. codeBlock.classList.add('copied');
  1268. setTimeout(() => {
  1269. codeBlock.classList.remove('copied');
  1270. }, 2000);
  1271. } catch (err) {
  1272. console.error('[Debug] Copy failed:', err);
  1273. showToast('复制失败,请手动复制');
  1274. }
  1275. });
  1276.  
  1277. // 添加关闭按钮功能
  1278. const closeButton = modal.querySelector('.close-button');
  1279. closeButton.addEventListener('click', () => {
  1280. modal.remove();
  1281. });
  1282.  
  1283. // ESC键关闭
  1284. const escHandler = (e) => {
  1285. if (e.key === 'Escape') {
  1286. modal.remove();
  1287. document.removeEventListener('keydown', escHandler);
  1288. }
  1289. };
  1290. document.addEventListener('keydown', escHandler);
  1291.  
  1292. document.body.appendChild(modal);
  1293. }
  1294.  
  1295. // 初始化
  1296. function initialize() {
  1297. // 确保DOM加载完成后再创建按钮
  1298. if (document.readyState === 'loading') {
  1299. document.addEventListener('DOMContentLoaded', () => {
  1300. createFloatingButton();
  1301. });
  1302. } else {
  1303. createFloatingButton();
  1304. }
  1305. }
  1306.  
  1307. // 启动脚本
  1308. initialize();
  1309. })();