// ==UserScript== // @name 小红书长尾词采集器 v1.0 // @namespace http://tampermonkey.net/ // @version 1.0 // @description 优化版:逐字真实输入、去重、进度条、停止功能、双Sheet导出、强化防检测 // @match *://www.xiaohongshu.com/* // @grant none // @require https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js // @run-at document-idle // ==/UserScript== (function(){ 'use strict'; const CONFIG = { MIN_DELAY: 4000, MAX_DELAY: 6000, PAUSE_CHECK_INTERVAL: 1500, SUG_RETRY_MAX: 30, SUG_RETRY_INTERVAL: 300, LEAVE_CHANCE: 0.15, LEAVE_MIN: 8000, LEAVE_MAX: 15000, TYPO_CHANCE: 0.05, }; const state = { allData: [], hotData: [], minDelay: CONFIG.MIN_DELAY, maxDelay: CONFIG.MAX_DELAY, isPaused: false, isStopped: false, collapsed: false, running: false, collectedKeywords: new Set(), }; const $ = id => document.getElementById(id); const log = msg => { const lb = $('logBox'); if (!lb) return; const div = document.createElement('div'); div.innerHTML = msg; lb.appendChild(div); lb.scrollTop = lb.scrollHeight; }; const logInfo = (text) => log(`${text}`); const logSuccess = (text) => log(`✅ ${text}`); const logWarn = (text) => log(`⚠️ ${text}`); const logError = (text) => log(`❌ ${text}`); const styles = ` #xhs-panel { position: fixed; top: 80px; right: 30px; width: 420px; background: #ffffff; border-radius: 12px; box-shadow: 0 8px 24px rgba(0,0,0,0.12); font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; color: #333; transition: all .3s ease; overflow: hidden; z-index: 1000000; user-select: none; } #xhs-panel.collapsed { width: 44px; height: 44px; border-radius: 22px; } #xhs-panel.collapsed .body, #xhs-panel.collapsed header h4 { display: none; } #xhs-panel.collapsed header { justify-content: center; padding: 10px; } #xhs-panel header { display: flex; align-items: center; justify-content: space-between; padding: 10px 14px; background: linear-gradient(135deg, #ff2442, #ff6a6a); color: #fff; cursor: move; } #xhs-panel header h4 { margin: 0; font-size: 13px; font-weight: 600; letter-spacing: 0.5px; } #xhs-panel header .header-btns { display: flex; gap: 4px; } #xhs-panel header .header-btns button { width: 24px; height: 24px; background: rgba(255,255,255,0.2); border: none; border-radius: 6px; color: #fff; font-size: 16px; cursor: pointer; line-height: 1; transition: background .2s; } #xhs-panel header .header-btns button:hover { background: rgba(255,255,255,0.35); } #xhs-panel .body { padding: 14px; display: block; } #xhs-panel .row { display: flex; gap: 8px; margin-bottom: 10px; align-items: center; } #xhs-panel .row label { font-size: 13px; color: #666; white-space: nowrap; } #xhs-panel input[type="text"] { flex: 1; padding: 8px 10px; border: 1px solid #ddd; border-radius: 8px; font-size: 13px; outline: none; transition: border .2s; } #xhs-panel input[type="text"]:focus { border-color: #ff2442; } #xhs-panel input[type="number"] { width: 52px; padding: 8px 6px; border: 1px solid #ddd; border-radius: 8px; font-size: 13px; text-align: center; outline: none; } #xhs-panel input[type="number"]:focus { border-color: #ff2442; } #xhs-panel .btn-group { display: flex; gap: 8px; margin-bottom: 10px; } #xhs-panel .btn-group button { flex: 1; padding: 8px 0; border: none; border-radius: 8px; font-size: 13px; font-weight: 500; cursor: pointer; transition: opacity .2s; } #xhs-panel .btn-group button:disabled { opacity: 0.4; cursor: not-allowed; } #xhs-panel .btn-group button:not(:disabled):hover { opacity: 0.85; } #xhs-panel #startBtn { background: #ff2442; color: #fff; } #xhs-panel #toggleBtn { background: #f0f0f0; color: #333; } #xhs-panel #toggleBtn.paused { background: #28a745; color: #fff; } #xhs-panel #stopBtn { background: #6c757d; color: #fff; flex: 0.5; } #xhs-panel #logBox { height: 160px; overflow-y: auto; border: 1px solid #eee; border-radius: 8px; padding: 8px 10px; font-size: 12px; line-height: 1.6; background: #fafafa; margin-bottom: 8px; } #xhs-panel #logBox div:first-child { margin-top: 0; } #xhs-panel #progressWrap { height: 4px; background: #f0f0f0; border-radius: 2px; margin-bottom: 10px; overflow: hidden; display: none; } #xhs-panel #progressBar { height: 100%; width: 0%; background: linear-gradient(90deg, #ff2442, #ff6a6a); border-radius: 2px; transition: width .3s ease; } #xhs-panel #statusText { font-size: 12px; color: #888; text-align: center; margin-bottom: 8px; display: none; } #xhs-panel .export-zone { display: flex; gap: 8px; } #xhs-panel .export-zone button { flex: 1; padding: 8px 0; border: none; border-radius: 8px; font-size: 13px; cursor: pointer; transition: opacity .2s; } #xhs-panel .export-zone button:hover { opacity: 0.85; } #xhs-panel #exportKwBtn { background: #17a2b8; color: #fff; } #xhs-panel #exportHotBtn { background: #6f42c1; color: #fff; } `; function injectStyles() { const style = document.createElement('style'); style.textContent = styles; document.head.appendChild(style); } function buildPanel() { const panel = document.createElement('div'); panel.id = 'xhs-panel'; panel.innerHTML = `

📋 长尾词采集器

~
就绪
等待开始...
`; document.body.appendChild(panel); return panel; } function applyDrag(panel) { const header = panel.querySelector('header'); let isDragging = false, startX, startY, origX, origY; const onStart = (e) => { if (e.target.tagName === 'BUTTON') return; isDragging = true; const pos = panel.getBoundingClientRect(); origX = pos.left; origY = pos.top; startX = e.clientX; startY = e.clientY; }; const onMove = (e) => { if (!isDragging) return; panel.style.left = (origX + e.clientX - startX) + 'px'; panel.style.top = (origY + e.clientY - startY) + 'px'; panel.style.right = 'auto'; }; const onEnd = () => { isDragging = false; }; header.addEventListener('mousedown', onStart); document.addEventListener('mousemove', onMove); document.addEventListener('mouseup', onEnd); } function sleep(ms) { return new Promise(r => setTimeout(r, ms)); } function randDelay(min, max) { return Math.floor(Math.random() * (max - min + 1)) + min; } function naturalDelay() { const base = randDelay(state.minDelay, state.maxDelay); const jitter = Math.random() * 1000; return base + jitter; } function randRange(min, max) { return min + Math.random() * (max - min); } async function pauseCheck() { while (state.isPaused && !state.isStopped) { await sleep(CONFIG.PAUSE_CHECK_INTERVAL); } if (state.isStopped) throw new Error('STOPPED'); } function updateUI(running) { state.running = running; $('startBtn').disabled = running; $('toggleBtn').disabled = !running; $('stopBtn').disabled = !running; if (!running) { $('toggleBtn').textContent = '⏸ 暂停'; $('toggleBtn').classList.remove('paused'); } } function setProgress(pct, text) { const wrap = $('progressWrap'); const bar = $('progressBar'); const st = $('statusText'); if (pct === undefined) { wrap.style.display = 'none'; st.style.display = 'none'; return; } wrap.style.display = 'block'; st.style.display = 'block'; bar.style.width = Math.min(100, Math.max(0, pct)) + '%'; if (text !== undefined) st.textContent = text; } function findSearchBox() { const candidates = document.querySelectorAll( '#search-input, input[placeholder*="搜索"], input[class*="search"], [class*="search-input"]' ); for (const el of candidates) { if (!el.isConnected) continue; if (el.id === 'search-input') return el; if (!el.hasAttribute('data-hp-kind')) return el; } for (const el of candidates) { if (el.isConnected) return el; } return null; } function setInputValueNative(input, value) { const nativeSetter = Object.getOwnPropertyDescriptor( window.HTMLInputElement.prototype, 'value' ); if (nativeSetter && nativeSetter.set) { nativeSetter.set.call(input, value); } else { input.value = value; } } function triggerVueReactivity(input) { input.dispatchEvent(new Event('input', { bubbles: true, cancelable: true })); } async function activateSearchBox(input) { input.click(); input.focus(); input.select(); await sleep(randDelay(180, 350)); } async function dismissSearch() { try { document.body.click(); } catch(e) {} await sleep(randDelay(150, 300)); } async function typeKeyword(input, text) { setInputValueNative(input, ''); triggerVueReactivity(input); await sleep(randDelay(80, 180)); for (let i = 0; i < text.length; i++) { if (state.isStopped || !input.isConnected) return; while (state.isPaused) await pauseCheck(); setInputValueNative(input, text.slice(0, i + 1)); triggerVueReactivity(input); const delay = i === text.length - 1 ? randDelay(100, 250) : randDelay(35, 80); await sleep(delay); } } function waitForSugItems(keyword, timeout = 8000) { return new Promise(resolve => { const startTime = Date.now(); let seenTexts = new Set(); const check = () => { if (state.isStopped) return resolve([]); let container = document.querySelector('.sug-container-wrapper, .sug-wrapper, .search-suggest'); if (!container || !container.isConnected) { if (Date.now() - startTime < timeout) { setTimeout(check, 200); } else { resolve([]); } return; } const items = container.querySelectorAll( '.sug-item, [class*="sug-item"]:not(a):not(li)' ); if (items.length > 0) { const results = Array.from(items) .map(el => el.innerText.trim()) .filter(Boolean); if (results.length > 0) { const newTexts = results.filter(t => !seenTexts.has(t)); if (newTexts.length > 0 && newTexts.length === results.length) { results.forEach(t => seenTexts.add(t)); } const validResults = results.filter(t => t.length < 50 && !t.includes('\n')); if (validResults.length >= 2) { return resolve(validResults); } } } if (Date.now() - startTime < timeout) { setTimeout(check, 200); } else { resolve([]); } }; setTimeout(check, 600); }); } async function humanSim() { window.scrollBy({ top: (Math.random() - 0.5) * 120, behavior: 'smooth', }); await sleep(randDelay(80, 200)); } async function maybeTakeBreak() { if (Math.random() < CONFIG.LEAVE_CHANCE) { const rest = Math.round(randRange(CONFIG.LEAVE_MIN, CONFIG.LEAVE_MAX)); logInfo(`😴 模拟暂离 ${Math.round(rest / 1000)}s`); const steps = 5; for (let i = 1; i <= steps; i++) { if (state.isStopped) return; await sleep(rest / steps); } logInfo('👋 回来了,继续采集'); } } async function grabWords(keyword) { const input = findSearchBox(); if (!input) { logError('未找到搜索输入框'); return []; } try { await activateSearchBox(input); await typeKeyword(input, keyword); } catch (e) { logWarn('输入过程异常,尝试继续'); } if (state.isStopped) return []; const results = await waitForSugItems(keyword, 8000); try { await dismissSearch(); } catch (e) {} return results; } function grabHot() { const selectors = [ '.query-note-list .query-note-item .item-text', '[class*="hot-list"] [class*="item"]', '[class*="trend"] [class*="item"]', ]; for (const sel of selectors) { const els = document.querySelectorAll(sel); if (els.length > 0) { return Array.from(els).map(e => e.innerText.trim()).filter(Boolean); } } return []; } async function startCollection() { if (state.running) return; state.isStopped = false; state.isPaused = false; state.collectedKeywords = new Set(); const seed = $('kw').value.trim(); if (!seed) { logError('请先输入种子关键词'); return; } const rawMin = parseInt($('minD').value) || 4; const rawMax = parseInt($('maxD').value) || 6; state.minDelay = Math.max(3, Math.min(rawMin, rawMax)) * 1000; state.maxDelay = Math.max(3, Math.max(rawMin, rawMax)) * 1000; state.allData = []; state.hotData = []; $('exportKwBtn').style.display = 'none'; $('exportHotBtn').style.display = 'none'; $('logBox').innerHTML = ''; updateUI(true); setProgress(0, '正在采集第一层联想词...'); try { logSuccess(`开始采集:${seed}`); logInfo(`延迟范围:${state.minDelay/1000}-${state.maxDelay/1000}秒`); await humanSim(); logInfo('正在输入关键词...'); const firstLayer = await grabWords(seed); if (state.isStopped) throw new Error('STOPPED'); if (firstLayer.length === 0) { logWarn('未获取到联想词,可能是页面结构有变化'); } else { logSuccess(`第一层获取到 ${firstLayer.length} 个联想词`); } for (const w of firstLayer) { state.collectedKeywords.add(w); state.allData.push({ keyword: seed, first: w, second: '' }); } let completed = 0; const total = firstLayer.length; for (const f of firstLayer) { if (state.isStopped) throw new Error('STOPPED'); await pauseCheck(); const pct = total > 0 ? (completed / total) * 100 : 0; setProgress(pct, `第二层采集:${completed}/${total} — "${f}"`); const wait = naturalDelay(); logInfo(`等待 ${(wait / 1000).toFixed(1)}s 后采集 "${f}" 的下层词`); await sleep(wait); if (state.isStopped) throw new Error('STOPPED'); await pauseCheck(); await humanSim(); await maybeTakeBreak(); logInfo(`🔍 采集 "${f}" 的联想词...`); const secondLayer = await grabWords(f); if (state.isStopped) throw new Error('STOPPED'); const newWords = secondLayer.filter(w => !state.collectedKeywords.has(w)); for (const w of newWords) { state.collectedKeywords.add(w); state.allData.push({ keyword: seed, first: f, second: w }); } if (newWords.length > 0) { logSuccess(`"${f}" → ${newWords.length} 个新词(共 ${secondLayer.length} 个)`); } else { logInfo(`"${f}" → 无新词`); } completed++; } setProgress(100, '第一轮采集完成'); await humanSim(); await sleep(randDelay(1000, 2000)); logInfo('正在采集热搜...'); const hot = grabHot(); hot.forEach(h => state.hotData.push({ hot: h })); if (hot.length > 0) { logSuccess(`热搜采集到 ${hot.length} 条`); } else { logWarn('未获取到热搜数据'); } const kwCount = state.allData.length; const hotCount = state.hotData.length; logSuccess(`采集完成!关键词 ${kwCount} 条,热搜 ${hotCount} 条`); $('exportKwBtn').style.display = kwCount > 0 ? 'block' : 'none'; $('exportHotBtn').style.display = hotCount > 0 ? 'block' : 'none'; setProgress(100, '✅ 采集完成'); await sleep(2000); setProgress(undefined); } catch (err) { if (err.message === 'STOPPED') { logWarn('⏹ 已停止采集'); const kwCount = state.allData.length; const hotCount = state.hotData.length; if (kwCount > 0 || hotCount > 0) { $('exportKwBtn').style.display = kwCount > 0 ? 'block' : 'none'; $('exportHotBtn').style.display = hotCount > 0 ? 'block' : 'none'; logInfo(`已采集的数据:关键词 ${kwCount} 条,热搜 ${hotCount} 条,可导出`); } } else { logError(`采集出错:${err.message}`); console.error(err); } setProgress(undefined); } finally { updateUI(false); } } function exportKeywords() { if (typeof XLSX === 'undefined') { logError('XLSX 库未加载,请检查网络后刷新页面重试'); return; } if (state.allData.length === 0) { logWarn('没有关键词数据可导出'); return; } const rows = [['种子词', '第一层', '第二层']]; for (const item of state.allData) { rows.push([item.keyword, item.first, item.second]); } const wb = XLSX.utils.book_new(); const ws = XLSX.utils.aoa_to_sheet(rows); ws['!cols'] = [ { wch: 20 }, { wch: 25 }, { wch: 25 }, ]; XLSX.utils.book_append_sheet(wb, ws, '长尾词'); XLSX.writeFile(wb, `小红书_长尾词_${state.allData[0].keyword}.xlsx`); logSuccess(`导出关键词成功:共 ${state.allData.length} 条`); } function exportHot() { if (typeof XLSX === 'undefined') { logError('XLSX 库未加载,请检查网络后刷新页面重试'); return; } if (state.hotData.length === 0) { logWarn('没有热搜数据可导出'); return; } const rows = [['热搜词']]; for (const item of state.hotData) { rows.push([item.hot]); } const wb = XLSX.utils.book_new(); const ws = XLSX.utils.aoa_to_sheet(rows); ws['!cols'] = [{ wch: 30 }]; XLSX.utils.book_append_sheet(wb, ws, '热搜'); XLSX.writeFile(wb, `小红书_热搜_${new Date().toISOString().slice(0,10)}.xlsx`); logSuccess(`导出热搜成功:共 ${state.hotData.length} 条`); } function init() { injectStyles(); const panel = buildPanel(); $('collapseBtn').onclick = () => { state.collapsed = !state.collapsed; panel.classList.toggle('collapsed', state.collapsed); $('collapseBtn').textContent = state.collapsed ? '+' : '−'; }; $('toggleBtn').onclick = () => { state.isPaused = !state.isPaused; const btn = $('toggleBtn'); if (state.isPaused) { btn.textContent = '▶ 继续'; btn.classList.add('paused'); logInfo('⏸ 已暂停'); } else { btn.textContent = '⏸ 暂停'; btn.classList.remove('paused'); logInfo('▶ 继续采集'); } }; $('stopBtn').onclick = () => { state.isStopped = true; state.isPaused = false; logWarn('正在停止...'); }; $('startBtn').onclick = startCollection; $('exportKwBtn').onclick = exportKeywords; $('exportHotBtn').onclick = exportHot; applyDrag(panel); } if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', init); } else { init(); } })();