自定义格式化文献
// ==UserScript==
// @name 自定义格式化文献
// @namespace https://bbs.tampermonkey.net.cn/
// @version 0.2.7
// @description 目前支持sciencedirect,rsc和springer, onlinelibrary.wiley.com
// @author tfsn20
// @match https://www.sciencedirect.com/*
// @match https://pubs.rsc.org/*
// @match https://link.springer.com/*
// @match https://onlinelibrary.wiley.com/*
// @match https://webvpn.zzuli.edu.cn/https/*
// @grant GM_getValue
// @grant GM_setValue
// ==/UserScript==
/* ==UserConfig==
自定义格式化参数:
figAnnotation:
title: 是否保留图片注释
description: 是否保留图片注释, 勾选时会保留
type: checkbox
default: false
tableRelated:
title: 是否保留表格相关
description: 是否保留表格相关, 勾选时会保留
type: checkbox
default: false
periodToOneDotLeader:
title: 涉及图指, 公式指, 表格指, 标题句点转换为一点前导符, 在标题和关键词末尾加句点; 删除表注和表格
description: 此选项用于深度学习时分割句子的区别, 勾选时请将1, 2, 4, 5 取消勾选
type: checkbox
default: false
title:
title: 是否保留序列标题
description: 不是论文题目,而是每一部分的小标题
type: checkbox
default: true
==/UserConfig== */
(function () {
'use strict';
let figAnnotation = GM_getValue('自定义格式化参数.figAnnotation');
let periodToOneDotLeader = GM_getValue('自定义格式化参数.periodToOneDotLeader');
let tableRelated = GM_getValue('自定义格式化参数.tableRelated');
let title = GM_getValue('自定义格式化参数.title');
let match = window.location.pathname.match(/(?:\/[^\/]*){1}\/([^\/]*)/);
let webvpnPara = match ? match[1] : null;
// 使用 JavaScript 动态添加 <script> 标签
var script = document.createElement('script');
script.src = 'https://cdn.jsdelivr.net/npm/docx@8.2.4/build/index.umd.min.js';
// 将 <script> 标签添加到文档的头部
document.head.appendChild(script);
function formatInnerText(element) {
/* 去除 [1,2,,,[3,4],[3]]; 替换波浪线运算符 \u223c ∼为~
替换only负号 \u2212 −为-; 替换en-dash \u2013 –为-; 替换only连字符 \u2010 ‐为-;
替换微间隔thin space \u2009 为 ; 替换微间隔hair space \u200a 为 ; 替换空格\u2005 为 ; 替换无中断空格 \u00a0 为 ;
去除wt.%, at.%各自与%之间的空格;
不替换\u2014 —为-
*/
return document.querySelector(element).innerText.replace(/ (\n\[\d+\]\n[.|,] ?)+/g, (match) => {
// 提取匹配字符串的最后一个字符
return match.slice(-2);
}).replace(/ ?\[[\d\[\], ]*\]/g, '').replace(/∼/g, '~')
.replace(/−/g, '-').replace(/–/g, '-').replace(/‐/g, '-')
.replace(/ /g, ' ').replace(/ /g, ' ').replace(/ /g, '').replace(/ /g, ' ')
.replace(/wt\. /g, 'wt.').replace(/at\. /g, 'at.')
}
function replaceNewlinesWithCondition(text) {
const lines = text.split('\n');
let newString = "";
for (let i = 0; i < lines.length - 1; i++) {
// Add the current line and the conditional newline or '1'
newString += lines[i];
// Check if the current or next line is less than 8 characters long
if ((lines[i].length < 8 && lines[i].length != 0) || (lines[i + 1].length < 8 && lines[i + 1].length != 0)) {
newString += '';
} else {
newString += '\n';
}
}
// Add the last line which doesn't have a newline character after it
newString += lines[lines.length - 1];
return newString;
}
function createButton(fontColor = '(235, 101, 0, 1)', backColor = '(228, 229, 229, 0.5)') {
var button = document.createElement('button');
button.id = 'blinkingButton';
button.innerHTML = 'Format this document';
button.style.cssText = `position: fixed; top: 5px; left: 50%; transform: translateX(-50%); padding: 5px 5px; background-color: rgba${backColor}; color: rgba${fontColor}; border: none; border-radius: 10px; cursor: pointer; z-index: 9999; font-family: "Times New Roman", Times, serif; font-size: 16px; `;
document.body.appendChild(button);
return button
}
function downDoc(text, fileName = 'doc', imgLink = '') {
/* txt */
// 创建一个Blob对象(二进制大对象
var blob = new Blob([text], { type: 'text/plain' });
// 创建一个a标签
var a = document.createElement('a');
// 创建一个文件名
a.href = window.URL.createObjectURL(blob);
a.download = `${fileName}.txt`;
// 将a标签附加到DOM中
document.body.appendChild(a);
// 模拟点击a标签,触发下载
a.click();
// 从DOM中移除a标签
document.body.removeChild(a);
/* html */
var blob = new Blob([`<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>${fileName}</title>
${imgLink}
<style type="text/css">
#text-container {
font-family:'Times New Roman';
font-size: 20px
}
</style>
</head>
<body>
<div id="text-container"></div>
</body>
<script>
var textWithNewline = \`${text}\`;
var formattedText = textWithNewline.replace(/\\n/g, '<br>');
document.getElementById('text-container').innerHTML = formattedText;
</script>
</html>`], { type: 'text/plain' });
var a = document.createElement('a');
a.href = window.URL.createObjectURL(blob);
a.download = `${fileName}.html`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
/* doc */
var blob = new Blob(['<!DOCTYPE html><html><head><meta charset="UTF-8"></head><body style="font-family: "Times New Roman", Times, serif; ">'
+ text.replace(/(.*)/mg, '<p>$1</p>') +
'</body></html>'],
{ type: 'application/msword;charset=utf-8' });
var a = document.createElement('a');
a.href = window.URL.createObjectURL(blob);
a.download = `${fileName}.doc`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
/* docx */
var text_ = text.match(/(^.*$)/mg);
var _ = []
for (var i = 0, len = text_.length; i < len; i++) {
_.push(new docx.Paragraph({
children: [
new docx.TextRun(text_[i])]
}))
}
const doc = new docx.Document({
sections: [{
properties: {},
children: _,
}]
});
docx.Packer.toBlob(doc).then(blob => {
var a = document.createElement('a');
a.href = window.URL.createObjectURL(blob);
a.download = `${fileName}.docx`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
});
}
if (window.location.host == 'www.sciencedirect.com' || webvpnPara == '77726476706e69737468656265737421e7e056d234336155700b8ca891472636a6d29e640e') {
button = createButton('(235, 101, 0, 1)', '(228, 229, 229, 0.5)')
button.addEventListener('click', function () {
// 获取下载文献的名称
let fileName = document.querySelector('article').querySelector('h1').innerText;
// 获取所有classname为'download-link'的元素
document.querySelectorAll('.download-link').forEach((e) => { e.parentNode.removeChild(e) });
// document.querySelectorAll('a').forEach((e) => { e.parentNode.removeChild(e) });
if (!figAnnotation) document.querySelectorAll("Figure").forEach((e) => { e.parentNode.removeChild(e) });
// 为标题末尾增加句点并替换原句点为一点前导符
if (periodToOneDotLeader) {
document.querySelector('article').querySelectorAll('h1, h2, h3, h4, h5').forEach(e => { e.innerText = e.innerText.replace(/\./g, '․') + '.' });
//为图指, 方程指,表指替换一点前导符, 下面的方法不通用
// var elements = document.querySelector('article').querySelectorAll('a');
// for (var i = 0; i < elements.length; i++) {
// if (elements[i].href.includes('#fig') || elements[i].href.includes('#eqn') || elements[i].href.includes('#tbl')) {
// elements[i].innerText = elements[i].innerText.replace(/\./g, '․');
// }
// }
}
//删除表注和表格, 存在仅仅字母t开头的列表
if (!tableRelated) document.querySelectorAll('[id^="t"]').forEach((e) => { if (e.id.match(/^t.*\d+$/)) e.parentNode.removeChild(e) });
// 替换img alt 'radical dot'为\u2022 •
document.querySelectorAll("img").forEach((e) => {
if (e.alt == 'radical dot') e.parentNode.replaceChild(document.createTextNode('•'), e)
})
if (!title) document.querySelector("#body>div").querySelectorAll('h1, h2, h3, h4, h5').forEach((e) => { e.parentNode.removeChild(e) });
var text = `${formatInnerText('article h1')}\n
${formatInnerText("#abstracts")}
${(() => {// 获取包含关键词的父元素
var keywordsContainer = document.querySelector('.Keywords');
if (!keywordsContainer) return ''
// 获取关键词节点列表
var keywordNodes = keywordsContainer.querySelectorAll('.keyword');
// 初始化一个空字符串,用于存储结果文本
// var resultText = 'Keywords\n\n';
var resultText = '\n' + keywordsContainer.querySelector('h2').innerText + '\n\n';
// 遍历关键词节点列表
for (var i = 0, len = keywordNodes.length; i < len; i++) {
var keywordText = keywordNodes[i].querySelector('span').innerText;
// 拼接到结果字符串,并添加换行符
if (i == len - 1) {
resultText += keywordText;
break
}
resultText += keywordText + '; ';
}
if (periodToOneDotLeader) {
return resultText + '.' + '\n'
} else {
return resultText + '\n'
}
})()}
${replaceNewlinesWithCondition(formatInnerText("#body>div"))}`;
if (periodToOneDotLeader) text = text.replace(/(Figs?)\./g, '$1․').replace(/(E|eqs?)\./g, '$1․').replace(/(et al)\./g, '$1․').replace(/e\.g\./g, 'e․g․');
downDoc(text, fileName, '<link rel="icon" href="https://sciencedirect.elseviercdn.cn/shared-assets/103/images/favSD.ico" type="image/x-icon">');
});
} else if (window.location.host == 'pubs.rsc.org') {
// '(238,220,0,1)'黄色 '(72,169,197)'淡蓝 '(0,73,118)'深蓝 '(151,215,0)'草绿
var button = createButton('(255,255,255,1)', '(239,51,64,0.9)');
button.addEventListener('click', function () {
// 去除论文作者
document.querySelectorAll('.article__authors').forEach((e) => { e.parentNode.removeChild(e) });
// 去除a标签(主要去除引用, 论文标题的†链接)
document.querySelectorAll('a').forEach((e) => { e.parentNode.removeChild(e) });
let fileName = (document.querySelector("div.article__title > h2") || document.querySelector("div>h1")).innerText
let text = document.querySelector("article") ? "article" : "#wrapper";
text = formatInnerText(text).replace(/\n\nConflicts of interest[\s\S]*/g, '').replace(/Data availability[\s\S]*/g, '');
if (periodToOneDotLeader) text = text.replace(/(Figs?)\./g, '$1․').replace(/(E|eqs?)\./g, '$1․').replace(/(et al)\./g, '$1․').replace(/e\.g\./g, 'e․g․');
downDoc(text, fileName, '<link rel="icon" type="image/png" sizes="32x32" href="https://pubs.rsc.org/favicon-32x32.png">');
})
} else if (window.location.host == 'link.springer.com' || webvpnPara == '77726476706e69737468656265737421fcfe4f976923784277068ea98a1b203a54') {
var button = createButton('(27,48,81,0.9)', '(206,219,224,0.8)');
button.addEventListener('click', function () {
var fileName = document.querySelector('h1').innerText;
//清空结论后的元素
var flag = false;
var elements = document.querySelector('.main-content').children;
for (let i = 0; i < elements.length; i++) {
if (!flag) {
if (elements[i].getAttribute('data-title') == 'Conclusion') {
flag = !flag
}
} else {
elements[i].innerHTML = ''
}
}
//图片注释
if (!figAnnotation) document.querySelectorAll('[data-test="figure"]').forEach((e) => { e.parentNode.removeChild(e) });
//表格
if (!tableRelated) document.querySelectorAll('[id*=table-]').forEach((e) => { e.parentNode.removeChild(e) });
// 小标题加句点
if (periodToOneDotLeader) document.querySelector('#main').querySelectorAll('h1, h2, h3, h4, h5').forEach((e) => { e.innerText = e.innerText.replace(/\./g, '․') + '.' });
// 小标题
if (!title) document.querySelector('#main').querySelectorAll('h1, h2, h3, h4, h5').forEach((e) => { e.parentNode.removeChild(e) });
var text = `${formatInnerText('h1[data-test="article-title"]')}\n
${formatInnerText('section[data-title="Abstract"]')}\n
${formatInnerText('.main-content')}`
if (periodToOneDotLeader) text = text.replace(/(Figs?)\./g, '$1․').replace(/(E|eqs?)\./g, '$1․').replace(/(et al)\./g, '$1․').replace(/e\.g\./g, 'e․g․');
downDoc(text, fileName, '<link rel="icon" type="image/png" sizes="32x32" href="https://link.springer.com/oscar-static/img/favicons/darwin/favicon-32x32-1435da3e82.png">');
});
} else if (window.location.host == 'onlinelibrary.wiley.com') {
var button = createButton('(27,48,81,0.9)', '(206,219,224,0.8)');
button.addEventListener('click', function () {
const fileName = document.querySelector('.citation__title').innerText;
var sons = document.querySelector('section[class*="article-section__full"]').children;
var flag = false
// 遍历并使一些子元素innerHtml为空
for (var i = 0; i < sons.length; i++) {
if (!flag) {
if (sons[i].tagName == 'SECTION') {
// console.log(i)
} else {
flag = true
sons[i].innerHTML = ''
}
} else {
sons[i].innerHTML = ''
}
}
if (periodToOneDotLeader) document.querySelectorAll('h1, h2, h3, h4, h5').forEach(e => { e.innerText = e.innerText.replace(/\./g, '․') + '.' });
if (!figAnnotation) document.querySelectorAll('.article-section__inline-figure').forEach((e) => { e.parentNode.removeChild(e) });
if (!tableRelated) document.querySelectorAll('.article-table-content').forEach((e) => { e.parentNode.removeChild(e) });
// 删除span标签, 去除引用
document.querySelector('section[class*="article-section__full"]').querySelectorAll('span').forEach((e) => { e.parentNode.removeChild(e) });
if (!title) document.querySelector('section[class*="article-section__full"]').querySelectorAll('h1, h2, h3, h4, h5').forEach((e) => { e.parentNode.removeChild(e) });
var text = `${formatInnerText('.citation__title')}\n
${formatInnerText('div[class*="abstract-group"]')}\n
${formatInnerText('section[class*="article-section__full"]')}`
if (periodToOneDotLeader) text = text.replace(/(Figs?)\./g, '$1․').replace(/(E|eqs?)\./g, '$1․').replace(/(et al)\./g, '$1․').replace(/e\.g\./g, 'e․g․');
downDoc(text, fileName, `<link rel="icon" href="https://onlinelibrary.wiley.com/favicon.ico">`);
});
}
var style = document.createElement('style');
style.textContent = `
@keyframes blink {
0% {
opacity: 1;
}
50% {
opacity: 0;
}
100% {
opacity: 1;
}
}
#blinkingButton {
animation: blink 1s infinite;
}
`;
document.head.appendChild(style);
})();