// ==UserScript==
// @name 小红书长尾词采集器 v1.0
// @namespace http://tampermonkey.net/
// @version 1.0
// @description 优化版:逐字真实输入、去重、进度条、停止功能、双Sheet导出、强化防检测
// @match *://www.xiaohongshu.com/*
// @grant none
// @require https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js
// @run-at document-idle
// ==/UserScript==
(function(){
'use strict';
const CONFIG = {
MIN_DELAY: 4000, MAX_DELAY: 6000,
PAUSE_CHECK_INTERVAL: 1500,
SUG_RETRY_MAX: 30, SUG_RETRY_INTERVAL: 300,
LEAVE_CHANCE: 0.15,
LEAVE_MIN: 8000, LEAVE_MAX: 15000,
TYPO_CHANCE: 0.05,
};
const state = {
allData: [],
hotData: [],
minDelay: CONFIG.MIN_DELAY,
maxDelay: CONFIG.MAX_DELAY,
isPaused: false,
isStopped: false,
collapsed: false,
running: false,
collectedKeywords: new Set(),
};
const $ = id => document.getElementById(id);
const log = msg => {
const lb = $('logBox');
if (!lb) return;
const div = document.createElement('div');
div.innerHTML = msg;
lb.appendChild(div);
lb.scrollTop = lb.scrollHeight;
};
const logInfo = (text) => log(`${text}`);
const logSuccess = (text) => log(`✅ ${text}`);
const logWarn = (text) => log(`⚠️ ${text}`);
const logError = (text) => log(`❌ ${text}`);
const styles = `
#xhs-panel {
position: fixed; top: 80px; right: 30px;
width: 420px; background: #ffffff; border-radius: 12px;
box-shadow: 0 8px 24px rgba(0,0,0,0.12);
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
color: #333; transition: all .3s ease;
overflow: hidden;
z-index: 1000000;
user-select: none;
}
#xhs-panel.collapsed { width: 44px; height: 44px; border-radius: 22px; }
#xhs-panel.collapsed .body,
#xhs-panel.collapsed header h4 { display: none; }
#xhs-panel.collapsed header { justify-content: center; padding: 10px; }
#xhs-panel header {
display: flex; align-items: center; justify-content: space-between;
padding: 10px 14px; background: linear-gradient(135deg, #ff2442, #ff6a6a);
color: #fff; cursor: move;
}
#xhs-panel header h4 { margin: 0; font-size: 13px; font-weight: 600; letter-spacing: 0.5px; }
#xhs-panel header .header-btns { display: flex; gap: 4px; }
#xhs-panel header .header-btns button {
width: 24px; height: 24px; background: rgba(255,255,255,0.2);
border: none; border-radius: 6px;
color: #fff; font-size: 16px; cursor: pointer; line-height: 1;
transition: background .2s;
}
#xhs-panel header .header-btns button:hover { background: rgba(255,255,255,0.35); }
#xhs-panel .body { padding: 14px; display: block; }
#xhs-panel .row { display: flex; gap: 8px; margin-bottom: 10px; align-items: center; }
#xhs-panel .row label { font-size: 13px; color: #666; white-space: nowrap; }
#xhs-panel input[type="text"] {
flex: 1; padding: 8px 10px; border: 1px solid #ddd; border-radius: 8px;
font-size: 13px; outline: none; transition: border .2s;
}
#xhs-panel input[type="text"]:focus { border-color: #ff2442; }
#xhs-panel input[type="number"] {
width: 52px; padding: 8px 6px; border: 1px solid #ddd; border-radius: 8px;
font-size: 13px; text-align: center; outline: none;
}
#xhs-panel input[type="number"]:focus { border-color: #ff2442; }
#xhs-panel .btn-group { display: flex; gap: 8px; margin-bottom: 10px; }
#xhs-panel .btn-group button {
flex: 1; padding: 8px 0; border: none; border-radius: 8px;
font-size: 13px; font-weight: 500; cursor: pointer; transition: opacity .2s;
}
#xhs-panel .btn-group button:disabled { opacity: 0.4; cursor: not-allowed; }
#xhs-panel .btn-group button:not(:disabled):hover { opacity: 0.85; }
#xhs-panel #startBtn { background: #ff2442; color: #fff; }
#xhs-panel #toggleBtn {
background: #f0f0f0; color: #333;
}
#xhs-panel #toggleBtn.paused { background: #28a745; color: #fff; }
#xhs-panel #stopBtn {
background: #6c757d; color: #fff; flex: 0.5;
}
#xhs-panel #logBox {
height: 160px; overflow-y: auto;
border: 1px solid #eee; border-radius: 8px; padding: 8px 10px;
font-size: 12px; line-height: 1.6;
background: #fafafa;
margin-bottom: 8px;
}
#xhs-panel #logBox div:first-child { margin-top: 0; }
#xhs-panel #progressWrap {
height: 4px; background: #f0f0f0; border-radius: 2px;
margin-bottom: 10px; overflow: hidden; display: none;
}
#xhs-panel #progressBar {
height: 100%; width: 0%; background: linear-gradient(90deg, #ff2442, #ff6a6a);
border-radius: 2px; transition: width .3s ease;
}
#xhs-panel #statusText {
font-size: 12px; color: #888; text-align: center;
margin-bottom: 8px; display: none;
}
#xhs-panel .export-zone { display: flex; gap: 8px; }
#xhs-panel .export-zone button {
flex: 1; padding: 8px 0; border: none; border-radius: 8px;
font-size: 13px; cursor: pointer; transition: opacity .2s;
}
#xhs-panel .export-zone button:hover { opacity: 0.85; }
#xhs-panel #exportKwBtn {
background: #17a2b8; color: #fff;
}
#xhs-panel #exportHotBtn {
background: #6f42c1; color: #fff;
}
`;
function injectStyles() {
const style = document.createElement('style');
style.textContent = styles;
document.head.appendChild(style);
}
function buildPanel() {
const panel = document.createElement('div');
panel.id = 'xhs-panel';
panel.innerHTML = `
`;
document.body.appendChild(panel);
return panel;
}
function applyDrag(panel) {
const header = panel.querySelector('header');
let isDragging = false, startX, startY, origX, origY;
const onStart = (e) => {
if (e.target.tagName === 'BUTTON') return;
isDragging = true;
const pos = panel.getBoundingClientRect();
origX = pos.left; origY = pos.top;
startX = e.clientX; startY = e.clientY;
};
const onMove = (e) => {
if (!isDragging) return;
panel.style.left = (origX + e.clientX - startX) + 'px';
panel.style.top = (origY + e.clientY - startY) + 'px';
panel.style.right = 'auto';
};
const onEnd = () => { isDragging = false; };
header.addEventListener('mousedown', onStart);
document.addEventListener('mousemove', onMove);
document.addEventListener('mouseup', onEnd);
}
function sleep(ms) {
return new Promise(r => setTimeout(r, ms));
}
function randDelay(min, max) {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
function naturalDelay() {
const base = randDelay(state.minDelay, state.maxDelay);
const jitter = Math.random() * 1000;
return base + jitter;
}
function randRange(min, max) {
return min + Math.random() * (max - min);
}
async function pauseCheck() {
while (state.isPaused && !state.isStopped) {
await sleep(CONFIG.PAUSE_CHECK_INTERVAL);
}
if (state.isStopped) throw new Error('STOPPED');
}
function updateUI(running) {
state.running = running;
$('startBtn').disabled = running;
$('toggleBtn').disabled = !running;
$('stopBtn').disabled = !running;
if (!running) {
$('toggleBtn').textContent = '⏸ 暂停';
$('toggleBtn').classList.remove('paused');
}
}
function setProgress(pct, text) {
const wrap = $('progressWrap');
const bar = $('progressBar');
const st = $('statusText');
if (pct === undefined) {
wrap.style.display = 'none';
st.style.display = 'none';
return;
}
wrap.style.display = 'block';
st.style.display = 'block';
bar.style.width = Math.min(100, Math.max(0, pct)) + '%';
if (text !== undefined) st.textContent = text;
}
function findSearchBox() {
const candidates = document.querySelectorAll(
'#search-input, input[placeholder*="搜索"], input[class*="search"], [class*="search-input"]'
);
for (const el of candidates) {
if (!el.isConnected) continue;
if (el.id === 'search-input') return el;
if (!el.hasAttribute('data-hp-kind')) return el;
}
for (const el of candidates) {
if (el.isConnected) return el;
}
return null;
}
function setInputValueNative(input, value) {
const nativeSetter = Object.getOwnPropertyDescriptor(
window.HTMLInputElement.prototype, 'value'
);
if (nativeSetter && nativeSetter.set) {
nativeSetter.set.call(input, value);
} else {
input.value = value;
}
}
function triggerVueReactivity(input) {
input.dispatchEvent(new Event('input', { bubbles: true, cancelable: true }));
}
async function activateSearchBox(input) {
input.click();
input.focus();
input.select();
await sleep(randDelay(180, 350));
}
async function dismissSearch() {
try {
document.body.click();
} catch(e) {}
await sleep(randDelay(150, 300));
}
async function typeKeyword(input, text) {
setInputValueNative(input, '');
triggerVueReactivity(input);
await sleep(randDelay(80, 180));
for (let i = 0; i < text.length; i++) {
if (state.isStopped || !input.isConnected) return;
while (state.isPaused) await pauseCheck();
setInputValueNative(input, text.slice(0, i + 1));
triggerVueReactivity(input);
const delay = i === text.length - 1
? randDelay(100, 250)
: randDelay(35, 80);
await sleep(delay);
}
}
function waitForSugItems(keyword, timeout = 8000) {
return new Promise(resolve => {
const startTime = Date.now();
let seenTexts = new Set();
const check = () => {
if (state.isStopped) return resolve([]);
let container = document.querySelector('.sug-container-wrapper, .sug-wrapper, .search-suggest');
if (!container || !container.isConnected) {
if (Date.now() - startTime < timeout) {
setTimeout(check, 200);
} else {
resolve([]);
}
return;
}
const items = container.querySelectorAll(
'.sug-item, [class*="sug-item"]:not(a):not(li)'
);
if (items.length > 0) {
const results = Array.from(items)
.map(el => el.innerText.trim())
.filter(Boolean);
if (results.length > 0) {
const newTexts = results.filter(t => !seenTexts.has(t));
if (newTexts.length > 0 && newTexts.length === results.length) {
results.forEach(t => seenTexts.add(t));
}
const validResults = results.filter(t => t.length < 50 && !t.includes('\n'));
if (validResults.length >= 2) {
return resolve(validResults);
}
}
}
if (Date.now() - startTime < timeout) {
setTimeout(check, 200);
} else {
resolve([]);
}
};
setTimeout(check, 600);
});
}
async function humanSim() {
window.scrollBy({
top: (Math.random() - 0.5) * 120,
behavior: 'smooth',
});
await sleep(randDelay(80, 200));
}
async function maybeTakeBreak() {
if (Math.random() < CONFIG.LEAVE_CHANCE) {
const rest = Math.round(randRange(CONFIG.LEAVE_MIN, CONFIG.LEAVE_MAX));
logInfo(`😴 模拟暂离 ${Math.round(rest / 1000)}s`);
const steps = 5;
for (let i = 1; i <= steps; i++) {
if (state.isStopped) return;
await sleep(rest / steps);
}
logInfo('👋 回来了,继续采集');
}
}
async function grabWords(keyword) {
const input = findSearchBox();
if (!input) {
logError('未找到搜索输入框');
return [];
}
try {
await activateSearchBox(input);
await typeKeyword(input, keyword);
} catch (e) {
logWarn('输入过程异常,尝试继续');
}
if (state.isStopped) return [];
const results = await waitForSugItems(keyword, 8000);
try {
await dismissSearch();
} catch (e) {}
return results;
}
function grabHot() {
const selectors = [
'.query-note-list .query-note-item .item-text',
'[class*="hot-list"] [class*="item"]',
'[class*="trend"] [class*="item"]',
];
for (const sel of selectors) {
const els = document.querySelectorAll(sel);
if (els.length > 0) {
return Array.from(els).map(e => e.innerText.trim()).filter(Boolean);
}
}
return [];
}
async function startCollection() {
if (state.running) return;
state.isStopped = false;
state.isPaused = false;
state.collectedKeywords = new Set();
const seed = $('kw').value.trim();
if (!seed) {
logError('请先输入种子关键词');
return;
}
const rawMin = parseInt($('minD').value) || 4;
const rawMax = parseInt($('maxD').value) || 6;
state.minDelay = Math.max(3, Math.min(rawMin, rawMax)) * 1000;
state.maxDelay = Math.max(3, Math.max(rawMin, rawMax)) * 1000;
state.allData = [];
state.hotData = [];
$('exportKwBtn').style.display = 'none';
$('exportHotBtn').style.display = 'none';
$('logBox').innerHTML = '';
updateUI(true);
setProgress(0, '正在采集第一层联想词...');
try {
logSuccess(`开始采集:${seed}`);
logInfo(`延迟范围:${state.minDelay/1000}-${state.maxDelay/1000}秒`);
await humanSim();
logInfo('正在输入关键词...');
const firstLayer = await grabWords(seed);
if (state.isStopped) throw new Error('STOPPED');
if (firstLayer.length === 0) {
logWarn('未获取到联想词,可能是页面结构有变化');
} else {
logSuccess(`第一层获取到 ${firstLayer.length} 个联想词`);
}
for (const w of firstLayer) {
state.collectedKeywords.add(w);
state.allData.push({ keyword: seed, first: w, second: '' });
}
let completed = 0;
const total = firstLayer.length;
for (const f of firstLayer) {
if (state.isStopped) throw new Error('STOPPED');
await pauseCheck();
const pct = total > 0 ? (completed / total) * 100 : 0;
setProgress(pct, `第二层采集:${completed}/${total} — "${f}"`);
const wait = naturalDelay();
logInfo(`等待 ${(wait / 1000).toFixed(1)}s 后采集 "${f}" 的下层词`);
await sleep(wait);
if (state.isStopped) throw new Error('STOPPED');
await pauseCheck();
await humanSim();
await maybeTakeBreak();
logInfo(`🔍 采集 "${f}" 的联想词...`);
const secondLayer = await grabWords(f);
if (state.isStopped) throw new Error('STOPPED');
const newWords = secondLayer.filter(w => !state.collectedKeywords.has(w));
for (const w of newWords) {
state.collectedKeywords.add(w);
state.allData.push({ keyword: seed, first: f, second: w });
}
if (newWords.length > 0) {
logSuccess(`"${f}" → ${newWords.length} 个新词(共 ${secondLayer.length} 个)`);
} else {
logInfo(`"${f}" → 无新词`);
}
completed++;
}
setProgress(100, '第一轮采集完成');
await humanSim();
await sleep(randDelay(1000, 2000));
logInfo('正在采集热搜...');
const hot = grabHot();
hot.forEach(h => state.hotData.push({ hot: h }));
if (hot.length > 0) {
logSuccess(`热搜采集到 ${hot.length} 条`);
} else {
logWarn('未获取到热搜数据');
}
const kwCount = state.allData.length;
const hotCount = state.hotData.length;
logSuccess(`采集完成!关键词 ${kwCount} 条,热搜 ${hotCount} 条`);
$('exportKwBtn').style.display = kwCount > 0 ? 'block' : 'none';
$('exportHotBtn').style.display = hotCount > 0 ? 'block' : 'none';
setProgress(100, '✅ 采集完成');
await sleep(2000);
setProgress(undefined);
} catch (err) {
if (err.message === 'STOPPED') {
logWarn('⏹ 已停止采集');
const kwCount = state.allData.length;
const hotCount = state.hotData.length;
if (kwCount > 0 || hotCount > 0) {
$('exportKwBtn').style.display = kwCount > 0 ? 'block' : 'none';
$('exportHotBtn').style.display = hotCount > 0 ? 'block' : 'none';
logInfo(`已采集的数据:关键词 ${kwCount} 条,热搜 ${hotCount} 条,可导出`);
}
} else {
logError(`采集出错:${err.message}`);
console.error(err);
}
setProgress(undefined);
} finally {
updateUI(false);
}
}
function exportKeywords() {
if (typeof XLSX === 'undefined') {
logError('XLSX 库未加载,请检查网络后刷新页面重试');
return;
}
if (state.allData.length === 0) {
logWarn('没有关键词数据可导出');
return;
}
const rows = [['种子词', '第一层', '第二层']];
for (const item of state.allData) {
rows.push([item.keyword, item.first, item.second]);
}
const wb = XLSX.utils.book_new();
const ws = XLSX.utils.aoa_to_sheet(rows);
ws['!cols'] = [
{ wch: 20 }, { wch: 25 }, { wch: 25 },
];
XLSX.utils.book_append_sheet(wb, ws, '长尾词');
XLSX.writeFile(wb, `小红书_长尾词_${state.allData[0].keyword}.xlsx`);
logSuccess(`导出关键词成功:共 ${state.allData.length} 条`);
}
function exportHot() {
if (typeof XLSX === 'undefined') {
logError('XLSX 库未加载,请检查网络后刷新页面重试');
return;
}
if (state.hotData.length === 0) {
logWarn('没有热搜数据可导出');
return;
}
const rows = [['热搜词']];
for (const item of state.hotData) {
rows.push([item.hot]);
}
const wb = XLSX.utils.book_new();
const ws = XLSX.utils.aoa_to_sheet(rows);
ws['!cols'] = [{ wch: 30 }];
XLSX.utils.book_append_sheet(wb, ws, '热搜');
XLSX.writeFile(wb, `小红书_热搜_${new Date().toISOString().slice(0,10)}.xlsx`);
logSuccess(`导出热搜成功:共 ${state.hotData.length} 条`);
}
function init() {
injectStyles();
const panel = buildPanel();
$('collapseBtn').onclick = () => {
state.collapsed = !state.collapsed;
panel.classList.toggle('collapsed', state.collapsed);
$('collapseBtn').textContent = state.collapsed ? '+' : '−';
};
$('toggleBtn').onclick = () => {
state.isPaused = !state.isPaused;
const btn = $('toggleBtn');
if (state.isPaused) {
btn.textContent = '▶ 继续';
btn.classList.add('paused');
logInfo('⏸ 已暂停');
} else {
btn.textContent = '⏸ 暂停';
btn.classList.remove('paused');
logInfo('▶ 继续采集');
}
};
$('stopBtn').onclick = () => {
state.isStopped = true;
state.isPaused = false;
logWarn('正在停止...');
};
$('startBtn').onclick = startCollection;
$('exportKwBtn').onclick = exportKeywords;
$('exportHotBtn').onclick = exportHot;
applyDrag(panel);
}
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', init);
} else {
init();
}
})();