// ==UserScript== // @name 拼音标注 // @namespace https://bbs.tampermonkey.net.cn/ // @version 1.0 // @description 音频拼音标注工具,集成托德拼音合法性、变调、轻声与儿化音实时检测,支持一键回填,全格子可编辑,智能前后移位 // @author You // @match https://tlabel.tencent.com/* // @grant GM_xmlhttpRequest // @license MIT // ==/UserScript== (function() { // 1. 拼音数据与检测引擎 const VALID_PINYIN_ARRAY = ["a1", "a2", "a3", "a4", "a5", "ai1", "ai2", "ai3", "ai4", "ai5", "an1", "an2", "an3", "an4", "an5", "ang1", "ang2", "ang3", "ang4", "ang5", "ao1", "ao2", "ao3", "ao4", "ao5", "aor1", "aor2", "aor3", "aor4", "aor5", "ar1", "ar2", "ar3", "ar4", "ar5", "ba1", "ba2", "ba3", "ba4", "ba5", "bai1", "bai2", "bai3", "bai4", "bai5", "ban1", "ban2", "ban3", "ban4", "ban5", "bang1", "bang2", "bang3", "bang4", "bang5", "bangr1", "bangr2", "bangr3", "bangr4", "bangr5", "bao1", "bao2", "bao3", "bao4", "bao5", "baor1", "baor2", "baor3", "baor4", "baor5", "bar1", "bar2", "bar3", "bar4", "bar5", "bei1", "bei2", "bei3", "bei4", "bei5", "ben1", "ben2", "ben3", "ben4", "ben5", "beng1", "beng2", "beng3", "beng4", "beng5", "bengr1", "bengr2", "bengr3", "bengr4", "bengr5", "benr1", "benr2", "benr3", "benr4", "benr5", "bi1", "bi2", "bi3", "bi4", "bi5", "bian1", "bian2", "bian3", "bian4", "bian5", "bianr1", "bianr2", "bianr3", "bianr4", "bianr5", "biao1", "biao2", "biao3", "biao4", "biao5", "biaor1", "biaor2", "biaor3", "biaor4", "biaor5", "bie1", "bie2", "bie3", "bie4", "bie5", "bin1", "bin2", "bin3", "bin4", "bin5", "bing1", "bing2", "bing3", "bing4", "bing5", "bingr1", "bingr2", "bingr3", "bingr4", "bingr5", "bir1", "bir2", "bir3", "bir4", "bir5", "bo1", "bo2", "bo3", "bo4", "bo5", "bor1", "bor2", "bor3", "bor4", "bor5", "bu1", "bu2", "bu3", "bu4", "bu5", "bur1", "bur2", "bur3", "bur4", "bur5", "ca1", "ca2", "ca3", "ca4", "ca5", "cai1", "cai2", "cai3", "cai4", "cai5", "can1", "can2", "can3", "can4", "can5", "cang1", "cang2", "cang3", "cang4", "cang5", "cao1", "cao2", "cao3", "cao4", "cao5", "caor1", "caor2", "caor3", "caor4", "caor5", "car1", "car2", "car3", "car4", "car5", "ce1", "ce2", "ce3", "ce4", "ce5", "cei1", "cei2", "cei3", "cei4", "cei5", "cen1", "cen2", "cen3", "cen4", "cen5", "ceng1", "ceng2", "ceng3", "ceng4", "ceng5", "cengr1", "cengr2", "cengr3", "cengr4", "cengr5", "cha1", "cha2", "cha3", "cha4", "cha5", "chai1", "chai2", "chai3", "chai4", "chai5", "chan1", "chan2", "chan3", "chan4", "chan5", "chang1", "chang2", "chang3", "chang4", "chang5", "changr1", "changr2", "changr3", "changr4", "changr5", "chao1", "chao2", "chao3", "chao4", "chao5", "chaor1", "chaor2", "chaor3", "chaor4", "chaor5", "char1", "char2", "char3", "char4", "char5", "che1", "che2", "che3", "che4", "che5", "chen1", "chen2", "chen3", "chen4", "chen5", "cheng1", "cheng2", "cheng3", "cheng4", "cheng5", "chengr1", "chengr2", "chengr3", "chengr4", "chengr5", "chi1", "chi2", "chi3", "chi4", "chi5", "chir1", "chir2", "chir3", "chir4", "chir5", "chong1", "chong2", "chong3", "chong4", "chong5", "chongr1", "chongr2", "chongr3", "chongr4", "chongr5", "chou1", "chou2", "chou3", "chou4", "chou5", "chour1", "chour2", "chour3", "chour4", "chour5", "chu1", "chu2", "chu3", "chu4", "chu5", "chua1", "chua2", "chua3", "chua4", "chua5", "chuai1", "chuai2", "chuai3", "chuai4", "chuai5", "chuan1", "chuan2", "chuan3", "chuan4", "chuan5", "chuang1", "chuang2", "chuang3", "chuang4", "chuang5", "chuangr1", "chuangr2", "chuangr3", "chuangr4", "chuangr5", "chuar1", "chuar2", "chuar3", "chuar4", "chuar5", "chui1", "chui2", "chui3", "chui4", "chui5", "chun1", "chun2", "chun3", "chun4", "chun5", "chuo1", "chuo2", "chuo3", "chuo4", "chuo5", "chuor1", "chuor2", "chuor3", "chuor4", "chuor5", "chur1", "chur2", "chur3", "chur4", "chur5", "ci1", "ci2", "ci3", "ci4", "ci5", "cir1", "cir2", "cir3", "cir4", "cir5", "cong1", "cong2", "cong3", "cong4", "cong5", "congr1", "congr2", "congr3", "congr4", "congr5", "cou1", "cou2", "cou3", "cou4", "cou5", "cour1", "cour2", "cour3", "cour4", "cour5", "cu1", "cu2", "cu3", "cu4", "cu5", "cuan1", "cuan2", "cuan3", "cuan4", "cuan5", "cui1", "cui2", "cui3", "cui4", "cui5", "cun1", "cun2", "cun3", "cun4", "cun5", "cuo1", "cuo2", "cuo3", "cuo4", "cuo5", "cuor1", "cuor2", "cuor3", "cuor4", "cuor5", "cur1", "cur2", "cur3", "cur4", "cur5", "da1", "da2", "da3", "da4", "da5", "dai1", "dai2", "dai3", "dai4", "dai5", "dan1", "dan2", "dan3", "dan4", "dan5", "dang1", "dang2", "dang3", "dang4", "dang5", "dangr1", "dangr2", "dangr3", "dangr4", "dangr5", "dao1", "dao2", "dao3", "dao4", "dao5", "daor1", "daor2", "daor3", "daor4", "daor5", "dar1", "dar2", "dar3", "dar4", "dar5", "de1", "de2", "de3", "de4", "de5", "dei1", "dei2", "dei3", "dei4", "dei5", "den1", "den2", "den3", "den4", "den5", "deng1", "deng2", "deng3", "deng4", "deng5", "dengr1", "dengr2", "dengr3", "dengr4", "dengr5", "der1", "der2", "der3", "der4", "der5", "di1", "di2", "di3", "di4", "di5", "dia1", "dia2", "dia3", "dia4", "dia5", "dian1", "dian2", "dian3", "dian4", "dian5", "diao1", "diao2", "diao3", "diao4", "diao5", "diaor1", "diaor2", "diaor3", "diaor4", "diaor5", "diar1", "diar2", "diar3", "diar4", "diar5", "die1", "die2", "die3", "die4", "die5", "ding1", "ding2", "ding3", "ding4", "ding5", "dingr1", "dingr2", "dingr3", "dingr4", "dingr5", "dir1", "dir2", "dir3", "dir4", "dir5", "diu1", "diu2", "diu3", "diu4", "diu5", "diur1", "diur2", "diur3", "diur4", "diur5", "dong1", "dong2", "dong3", "dong4", "dong5", "dongr1", "dongr2", "dongr3", "dongr4", "dongr5", "dou1", "dou2", "dou3", "dou4", "dou5", "dour1", "dour2", "dour3", "dour4", "dour5", "du1", "du2", "du3", "du4", "du5", "duan1", "duan2", "duan3", "duan4", "duan5", "duar1", "duar2", "duar3", "duar4", "duar5", "dui1", "dui2", "dui3", "dui4", "dui5", "dun1", "dun2", "dun3", "dun4", "dun5", "duo1", "duo2", "duo3", "duo4", "duo5", "duor1", "duor2", "duor3", "duor4", "duor5", "dur1", "dur2", "dur3", "dur4", "dur5", "e1", "e2", "e3", "e4", "e5", "ei1", "ei2", "ei3", "ei4", "ei5", "en1", "en2", "en3", "en4", "en5", "eng1", "eng2", "eng3", "eng4", "eng5", "er1", "er2", "er3", "er4", "er5", "fa1", "fa2", "fa3", "fa4", "fa5", "fan1", "fan2", "fan3", "fan4", "fan5", "fang1", "fang2", "fang3", "fang4", "fang5", "fangr1", "fangr2", "fangr3", "fangr4", "fangr5", "far1", "far2", "far3", "far4", "far5", "fei1", "fei2", "fei3", "fei4", "fei5", "fen1", "fen2", "fen3", "fen4", "fen5", "feng1", "feng2", "feng3", "feng4", "feng5", "fengr1", "fengr2", "fengr3", "fengr4", "fengr5", "fer1", "fer2", "fer3", "fer4", "fer5", "fiao1", "fiao2", "fiao3", "fiao4", "fiao5", "fo1", "fo2", "fo3", "fo4", "fo5", "fou1", "fou2", "fou3", "fou4", "fou5", "fu1", "fu2", "fu3", "fu4", "fu5", "fur1", "fur2", "fur3", "fur4", "fur5", "ga1", "ga2", "ga3", "ga4", "ga5", "gai1", "gai2", "gai3", "gai4", "gai5", "gan1", "gan2", "gan3", "gan4", "gan5", "gang1", "gang2", "gang3", "gang4", "gang5", "gangr1", "gangr2", "gangr3", "gangr4", "gangr5", "gao1", "gao2", "gao3", "gao4", "gao5", "gaor1", "gaor2", "gaor3", "gaor4", "gaor5", "gar1", "gar2", "gar3", "gar4", "gar5", "ge1", "ge2", "ge3", "ge4", "ge5", "gei1", "gei2", "gei3", "gei4", "gei5", "gen1", "gen2", "gen3", "gen4", "gen5", "geng1", "geng2", "geng3", "geng4", "geng5", "gengr1", "gengr2", "gengr3", "gengr4", "gengr5", "ger1", "ger2", "ger3", "ger4", "ger5", "gi1", "gi2", "gi3", "gi4", "gi5", "gong1", "gong2", "gong3", "gong4", "gong5", "gongr1", "gongr2", "gongr3", "gongr4", "gongr5", "gou1", "gou2", "gou3", "gou4", "gou5", "gour1", "gour2", "gour3", "gour4", "gour5", "gu1", "gu2", "gu3", "gu4", "gu5", "gua1", "gua2", "gua3", "gua4", "gua5", "guai1", "guai2", "guai3", "guai4", "guai5", "guan1", "guan2", "guan3", "guan4", "guan5", "guang1", "guang2", "guang3", "guang4", "guang5", "guangr1", "guangr2", "guangr3", "guangr4", "guangr5", "guar1", "guar2", "guar3", "guar4", "guar5", "gui1", "gui2", "gui3", "gui4", "gui5", "gun1", "gun2", "gun3", "gun4", "gun5", "guo1", "guo2", "guo3", "guo4", "guo5", "guor1", "guor2", "guor3", "guor4", "guor5", "gur1", "gur2", "gur3", "gur4", "gur5", "ha1", "ha2", "ha3", "ha4", "ha5", "hai1", "hai2", "hai3", "hai4", "hai5", "han1", "han2", "han3", "han4", "han5", "hang1", "hang2", "hang3", "hang4", "hang5", "hangr1", "hangr2", "hangr3", "hangr4", "hangr5", "hao1", "hao2", "hao3", "hao4", "hao5", "haor1", "haor2", "haor3", "haor4", "haor5", "har1", "har2", "har3", "har4", "har5", "he1", "he2", "he3", "he4", "he5", "hei1", "hei2", "hei3", "hei4", "hei5", "hen1", "hen2", "hen3", "hen4", "hen5", "heng1", "heng2", "heng3", "heng4", "heng5", "hengr1", "hengr2", "hengr3", "hengr4", "hengr5", "her1", "her2", "her3", "her4", "her5", "hm1", "hm2", "hm3", "hm4", "hm5", "hng1", "hng2", "hng3", "hng4", "hng5", "hong1", "hong2", "hong3", "hong4", "hong5", "hou1", "hou2", "hou3", "hou4", "hou5", "hour1", "hour2", "hour3", "hour4", "hour5", "hu1", "hu2", "hu3", "hu4", "hu5", "hua1", "hua2", "hua3", "hua4", "hua5", "huai1", "huai2", "huai3", "huai4", "huai5", "huan1", "huan2", "huan3", "huan4", "huan5", "huang1", "huang2", "huang3", "huang4", "huang5", "huangr1", "huangr2", "huangr3", "huangr4", "huangr5", "huar1", "huar2", "huar3", "huar4", "huar5", "hui1", "hui2", "hui3", "hui4", "hui5", "hun1", "hun2", "hun3", "hun4", "hun5", "huo1", "huo2", "huo3", "huo4", "huo5", "huor1", "huor2", "huor3", "huor4", "huor5", "hur1", "hur2", "hur3", "hur4", "hur5", "ji1", "ji2", "ji3", "ji4", "ji5", "jia1", "jia2", "jia3", "jia4", "jia5", "jian1", "jian2", "jian3", "jian4", "jian5", "jiang1", "jiang2", "jiang3", "jiang4", "jiang5", "jiangr1", "jiangr2", "jiangr3", "jiangr4", "jiangr5", "jiao1", "jiao2", "jiao3", "jiao4", "jiao5", "jiaor1", "jiaor2", "jiaor3", "jiaor4", "jiaor5", "jiar1", "jiar2", "jiar3", "jiar4", "jiar5", "jie1", "jie2", "jie3", "jie4", "jie5", "jin1", "jin2", "jin3", "jin4", "jin5", "jing1", "jing2", "jing3", "jing4", "jing5", "jingr1", "jingr2", "jingr3", "jingr4", "jingr5", "jiong1", "jiong2", "jiong3", "jiong4", "jiong5", "jir1", "jir2", "jir3", "jir4", "jir5", "jiu1", "jiu2", "jiu3", "jiu4", "jiu5", "jiur1", "jiur2", "jiur3", "jiur4", "jiur5", "ju1", "ju2", "ju3", "ju4", "ju5", "juan1", "juan2", "juan3", "juan4", "juan5", "juar1", "juar2", "juar3", "juar4", "juar5", "jue1", "jue2", "jue3", "jue4", "jue5", "jun1", "jun2", "jun3", "jun4", "jun5", "jur1", "jur2", "jur3", "jur4", "jur5", "ka1", "ka2", "ka3", "ka4", "ka5", "kai1", "kai2", "kai3", "kai4", "kai5", "kan1", "kan2", "kan3", "kan4", "kan5", "kang1", "kang2", "kang3", "kang4", "kang5", "kangr1", "kangr2", "kangr3", "kangr4", "kangr5", "kao1", "kao2", "kao3", "kao4", "kao5", "kaor1", "kaor2", "kaor3", "kaor4", "kaor5", "kar1", "kar2", "kar3", "kar4", "kar5", "ke1", "ke2", "ke3", "ke4", "ke5", "kei1", "kei2", "kei3", "kei4", "kei5", "ken1", "ken2", "ken3", "ken4", "ken5", "keng1", "keng2", "keng3", "keng4", "keng5", "kengr1", "kengr2", "kengr3", "kengr4", "kengr5", "ker1", "ker2", "ker3", "ker4", "ker5", "kong1", "kong2", "kong3", "kong4", "kong5", "kongr1", "kongr2", "kongr3", "kongr4", "kongr5", "kou1", "kou2", "kou3", "kou4", "kou5", "kour1", "kour2", "kour3", "kour4", "kour5", "ku1", "ku2", "ku3", "ku4", "ku5", "kua1", "kua2", "kua3", "kua4", "kua5", "kuai1", "kuai2", "kuai3", "kuai4", "kuai5", "kuan1", "kuan2", "kuan3", "kuan4", "kuan5", "kuang1", "kuang2", "kuang3", "kuang4", "kuang5", "kuangr1", "kuangr2", "kuangr3", "kuangr4", "kuangr5", "kuar1", "kuar2", "kuar3", "kuar4", "kuar5", "kui1", "kui2", "kui3", "kui4", "kui5", "kuir1", "kuir2", "kuir3", "kuir4", "kuir5", "kun1", "kun2", "kun3", "kun4", "kun5", "kuo1", "kuo2", "kuo3", "kuo4", "kuo5", "kuor1", "kuor2", "kuor3", "kuor4", "kuor5", "la1", "la2", "la3", "la4", "la5", "lai1", "lai2", "lai3", "lai4", "lai5", "lan1", "lan2", "lan3", "lan4", "lan5", "lang1", "lang2", "lang3", "lang4", "lang5", "langr1", "langr2", "langr3", "langr4", "langr5", "lao1", "lao2", "lao3", "lao4", "lao5", "laor1", "laor2", "laor3", "laor4", "laor5", "lar1", "lar2", "lar3", "lar4", "lar5", "le1", "le2", "le3", "le4", "le5", "lei1", "lei2", "lei3", "lei4", "lei5", "leng1", "leng2", "leng3", "leng4", "leng5", "lengr1", "lengr2", "lengr3", "lengr4", "lengr5", "ler1", "ler2", "ler3", "ler4", "ler5", "li1", "li2", "li3", "li4", "li5", "lia1", "lia2", "lia3", "lia4", "lia5", "lian1", "lian2", "lian3", "lian4", "lian5", "liang1", "liang2", "liang3", "liang4", "liang5", "liangr1", "liangr2", "liangr3", "liangr4", "liangr5", "liao1", "liao2", "liao3", "liao4", "liao5", "liaor1", "liaor2", "liaor3", "liaor4", "liaor5", "liar1", "liar2", "liar3", "liar4", "liar5", "lie1", "lie2", "lie3", "lie4", "lie5", "lin1", "lin2", "lin3", "lin4", "lin5", "ling1", "ling2", "ling3", "ling4", "ling5", "lingr1", "lingr2", "lingr3", "lingr4", "lingr5", "lir1", "lir2", "lir3", "lir4", "lir5", "liu1", "liu2", "liu3", "liu4", "liu5", "liur1", "liur2", "liur3", "liur4", "liur5", "lo1", "lo2", "lo3", "lo4", "lo5", "long1", "long2", "long3", "long4", "long5", "longr1", "longr2", "longr3", "longr4", "longr5", "lou1", "lou2", "lou3", "lou4", "lou5", "lour1", "lour2", "lour3", "lour4", "lour5", "lu1", "lu2", "lu3", "lu4", "lu5", "luan1", "luan2", "luan3", "luan4", "luan5", "luar1", "luar2", "luar3", "luar4", "luar5", "lun1", "lun2", "lun3", "lun4", "lun5", "luo1", "luo2", "luo3", "luo4", "luo5", "luor1", "luor2", "luor3", "luor4", "luor5", "lur1", "lur2", "lur3", "lur4", "lur5", "lv1", "lv2", "lv3", "lv4", "lv5", "lve1", "lve2", "lve3", "lve4", "lve5", "lvr1", "lvr2", "lvr3", "lvr4", "lvr5", "m1", "m2", "m3", "m4", "m5", "ma1", "ma2", "ma3", "ma4", "ma5", "mai1", "mai2", "mai3", "mai4", "mai5", "man1", "man2", "man3", "man4", "man5", "mang1", "mang2", "mang3", "mang4", "mang5", "mangr1", "mangr2", "mangr3", "mangr4", "mangr5", "mao1", "mao2", "mao3", "mao4", "mao5", "maor1", "maor2", "maor3", "maor4", "maor5", "mar1", "mar2", "mar3", "mar4", "mar5", "me1", "me2", "me3", "me4", "me5", "mei1", "mei2", "mei3", "mei4", "mei5", "men1", "men2", "men3", "men4", "men5", "meng1", "meng2", "meng3", "meng4", "meng5", "mer1", "mer2", "mer3", "mer4", "mer5", "mi1", "mi2", "mi3", "mi4", "mi5", "mian1", "mian2", "mian3", "mian4", "mian5", "miao1", "miao2", "miao3", "miao4", "miao5", "miaor1", "miaor2", "miaor3", "miaor4", "miaor5", "miar1", "miar2", "miar3", "miar4", "miar5", "mie1", "mie2", "mie3", "mie4", "mie5", "min1", "min2", "min3", "min4", "min5", "ming1", "ming2", "ming3", "ming4", "ming5", "mingr1", "mingr2", "mingr3", "mingr4", "mingr5", "mir1", "mir2", "mir3", "mir4", "mir5", "miu1", "miu2", "miu3", "miu4", "miu5", "mo1", "mo2", "mo3", "mo4", "mo5", "mor1", "mor2", "mor3", "mor4", "mor5", "mou1", "mou2", "mou3", "mou4", "mou5", "mu1", "mu2", "mu3", "mu4", "mu5", "mur1", "mur2", "mur3", "mur4", "mur5", "n1", "n2", "n3", "n4", "n5", "na1", "na2", "na3", "na4", "na5", "nai1", "nai2", "nai3", "nai4", "nai5", "nan1", "nan2", "nan3", "nan4", "nan5", "nang1", "nang2", "nang3", "nang4", "nang5", "nao1", "nao2", "nao3", "nao4", "nao5", "naor1", "naor2", "naor3", "naor4", "naor5", "nar1", "nar2", "nar3", "nar4", "nar5", "ne1", "ne2", "ne3", "ne4", "ne5", "nei1", "nei2", "nei3", "nei4", "nei5", "nen1", "nen2", "nen3", "nen4", "nen5", "neng1", "neng2", "neng3", "neng4", "neng5", "ng1", "ng2", "ng3", "ng4", "ng5", "ni1", "ni2", "ni3", "ni4", "ni5", "nia1", "nia2", "nia3", "nia4", "nia5", "nian1", "nian2", "nian3", "nian4", "nian5", "niang1", "niang2", "niang3", "niang4", "niang5", "niangr1", "niangr2", "niangr3", "niangr4", "niangr5", "niao1", "niao2", "niao3", "niao4", "niao5", "niaor1", "niaor2", "niaor3", "niaor4", "niaor5", "niar1", "niar2", "niar3", "niar4", "niar5", "nie1", "nie2", "nie3", "nie4", "nie5", "nin1", "nin2", "nin3", "nin4", "nin5", "ning1", "ning2", "ning3", "ning4", "ning5", "ningr1", "ningr2", "ningr3", "ningr4", "ningr5", "nir1", "nir2", "nir3", "nir4", "nir5", "niu1", "niu2", "niu3", "niu4", "niu5", "niur1", "niur2", "niur3", "niur4", "niur5", "nong1", "nong2", "nong3", "nong4", "nong5", "nongr1", "nongr2", "nongr3", "nongr4", "nongr5", "nou1", "nou2", "nou3", "nou4", "nou5", "nu1", "nu2", "nu3", "nu4", "nu5", "nuan1", "nuan2", "nuan3", "nuan4", "nuan5", "nun1", "nun2", "nun3", "nun4", "nun5", "nuo1", "nuo2", "nuo3", "nuo4", "nuo5", "nv1", "nv2", "nv3", "nv4", "nv5", "nve1", "nve2", "nve3", "nve4", "nve5", "nvr1", "nvr2", "nvr3", "nvr4", "nvr5", "o1", "o2", "o3", "o4", "o5", "ou1", "ou2", "ou3", "ou4", "ou5", "our1", "our2", "our3", "our4", "our5", "pa1", "pa2", "pa3", "pa4", "pa5", "pai1", "pai2", "pai3", "pai4", "pai5", "pan1", "pan2", "pan3", "pan4", "pan5", "pang1", "pang2", "pang3", "pang4", "pang5", "pangr1", "pangr2", "pangr3", "pangr4", "pangr5", "panr1", "panr2", "panr3", "panr4", "panr5", "pao1", "pao2", "pao3", "pao4", "pao5", "paor1", "paor2", "paor3", "paor4", "paor5", "pei1", "pei2", "pei3", "pei4", "pei5", "pen1", "pen2", "pen3", "pen4", "pen5", "peng1", "peng2", "peng3", "peng4", "peng5", "pengr1", "pengr2", "pengr3", "pengr4", "pengr5", "penr1", "penr2", "penr3", "penr4", "penr5", "pi1", "pi2", "pi3", "pi4", "pi5", "pian1", "pian2", "pian3", "pian4", "pian5", "piao1", "piao2", "piao3", "piao4", "piao5", "piaor1", "piaor2", "piaor3", "piaor4", "piaor5", "piar1", "piar2", "piar3", "piar4", "piar5", "pie1", "pie2", "pie3", "pie4", "pie5", "pin1", "pin2", "pin3", "pin4", "pin5", "ping1", "ping2", "ping3", "ping4", "ping5", "pingr1", "pingr2", "pingr3", "pingr4", "pingr5", "pir1", "pir2", "pir3", "pir4", "pir5", "po1", "po2", "po3", "po4", "po5", "por1", "por2", "por3", "por4", "por5", "pou1", "pou2", "pou3", "pou4", "pou5", "pu1", "pu2", "pu3", "pu4", "pu5", "pur1", "pur2", "pur3", "pur4", "pur5", "qi1", "qi2", "qi3", "qi4", "qi5", "qia1", "qia2", "qia3", "qia4", "qia5", "qian1", "qian2", "qian3", "qian4", "qian5", "qiang1", "qiang2", "qiang3", "qiang4", "qiang5", "qiangr1", "qiangr2", "qiangr3", "qiangr4", "qiangr5", "qiao1", "qiao2", "qiao3", "qiao4", "qiao5", "qiaor1", "qiaor2", "qiaor3", "qiaor4", "qiaor5", "qiar1", "qiar2", "qiar3", "qiar4", "qiar5", "qie1", "qie2", "qie3", "qie4", "qie5", "qin1", "qin2", "qin3", "qin4", "qin5", "qing1", "qing2", "qing3", "qing4", "qing5", "qingr1", "qingr2", "qingr3", "qingr4", "qingr5", "qiong1", "qiong2", "qiong3", "qiong4", "qiong5", "qir1", "qir2", "qir3", "qir4", "qir5", "qiu1", "qiu2", "qiu3", "qiu4", "qiu5", "qiur1", "qiur2", "qiur3", "qiur4", "qiur5", "qu1", "qu2", "qu3", "qu4", "qu5", "quan1", "quan2", "quan3", "quan4", "quan5", "quanr1", "quanr2", "quanr3", "quanr4", "quanr5", "que1", "que2", "que3", "que4", "que5", "qun1", "qun2", "qun3", "qun4", "qun5", "qur1", "qur2", "qur3", "qur4", "qur5", "ran1", "ran2", "ran3", "ran4", "ran5", "rang1", "rang2", "rang3", "rang4", "rang5", "rangr1", "rangr2", "rangr3", "rangr4", "rangr5", "rao1", "rao2", "rao3", "rao4", "rao5", "raor1", "raor2", "raor3", "raor4", "raor5", "re1", "re2", "re3", "re4", "re5", "ren1", "ren2", "ren3", "ren4", "ren5", "reng1", "reng2", "reng3", "reng4", "reng5", "ri1", "ri2", "ri3", "ri4", "ri5", "rir1", "rir2", "rir3", "rir4", "rir5", "rong1", "rong2", "rong3", "rong4", "rong5", "rongr1", "rongr2", "rongr3", "rongr4", "rongr5", "rou1", "rou2", "rou3", "rou4", "rou5", "rour1", "rour2", "rour3", "rour4", "rour5", "ru1", "ru2", "ru3", "ru4", "ru5", "rua1", "rua2", "rua3", "rua4", "rua5", "ruan1", "ruan2", "ruan3", "ruan4", "ruan5", "ruanr1", "ruanr2", "ruanr3", "ruanr4", "ruanr5", "rui1", "rui2", "rui3", "rui4", "rui5", "run1", "run2", "run3", "run4", "run5", "ruo1", "ruo2", "ruo3", "ruo4", "ruo5", "sa1", "sa2", "sa3", "sa4", "sa5", "sai1", "sai2", "sai3", "sai4", "sai5", "san1", "san2", "san3", "san4", "san5", "sang1", "sang2", "sang3", "sang4", "sang5", "sangr1", "sangr2", "sangr3", "sangr4", "sangr5", "sao1", "sao2", "sao3", "sao4", "sao5", "saor1", "saor2", "saor3", "saor4", "saor5", "sar1", "sar2", "sar3", "sar4", "sar5", "se1", "se2", "se3", "se4", "se5", "sei1", "sei2", "sei3", "sei4", "sei5", "sen1", "sen2", "sen3", "sen4", "sen5", "seng1", "seng2", "seng3", "seng4", "seng5", "sha1", "sha2", "sha3", "sha4", "sha5", "shai1", "shai2", "shai3", "shai4", "shai5", "shan1", "shan2", "shan3", "shan4", "shan5", "shang1", "shang2", "shang3", "shang4", "shang5", "shangr1", "shangr2", "shangr3", "shangr4", "shangr5", "shao1", "shao2", "shao3", "shao4", "shao5", "shaor1", "shaor2", "shaor3", "shaor4", "shaor5", "shar1", "shar2", "shar3", "shar4", "shar5", "she1", "she2", "she3", "she4", "she5", "shei1", "shei2", "shei3", "shei4", "shei5", "shen1", "shen2", "shen3", "shen4", "shen5", "sheng1", "sheng2", "sheng3", "sheng4", "sheng5", "shengr1", "shengr2", "shengr3", "shengr4", "shengr5", "shi1", "shi2", "shi3", "shi4", "shi5", "shir1", "shir2", "shir3", "shir4", "shir5", "shou1", "shou2", "shou3", "shou4", "shou5", "shour1", "shour2", "shour3", "shour4", "shour5", "shu1", "shu2", "shu3", "shu4", "shu5", "shua1", "shua2", "shua3", "shua4", "shua5", "shuai1", "shuai2", "shuai3", "shuai4", "shuai5", "shuan1", "shuan2", "shuan3", "shuan4", "shuan5", "shuang1", "shuang2", "shuang3", "shuang4", "shuang5", "shuangr1", "shuangr2", "shuangr3", "shuangr4", "shuangr5", "shuar1", "shuar2", "shuar3", "shuar4", "shuar5", "shui1", "shui2", "shui3", "shui4", "shui5", "shun1", "shun2", "shun3", "shun4", "shun5", "shuo1", "shuo2", "shuo3", "shuo4", "shuo5", "shuor1", "shuor2", "shuor3", "shuor4", "shuor5", "shur1", "shur2", "shur3", "shur4", "shur5", "si1", "si2", "si3", "si4", "si5", "sir1", "sir2", "sir3", "sir4", "sir5", "song1", "song2", "song3", "song4", "song5", "sou1", "sou2", "sou3", "sou4", "sou5", "sour1", "sour2", "sour3", "sour4", "sour5", "su1", "su2", "su3", "su4", "su5", "suan1", "suan2", "suan3", "suan4", "suan5", "suar1", "suar2", "suar3", "suar4", "suar5", "sui1", "sui2", "sui3", "sui4", "sui5", "sun1", "sun2", "sun3", "sun4", "sun5", "sunr1", "sunr2", "sunr3", "sunr4", "sunr5", "suo1", "suo2", "suo3", "suo4", "suo5", "suor1", "suor2", "suor3", "suor4", "suor5", "ta1", "ta2", "ta3", "ta4", "ta5", "tai1", "tai2", "tai3", "tai4", "tai5", "tan1", "tan2", "tan3", "tan4", "tan5", "tang1", "tang2", "tang3", "tang4", "tang5", "tangr1", "tangr2", "tangr3", "tangr4", "tangr5", "tao1", "tao2", "tao3", "tao4", "tao5", "taor1", "taor2", "taor3", "taor4", "taor5", "tar1", "tar2", "tar3", "tar4", "tar5", "te1", "te2", "te3", "te4", "te5", "tei1", "tei2", "tei3", "tei4", "tei5", "teng1", "teng2", "teng3", "teng4", "teng5", "tengr1", "tengr2", "tengr3", "tengr4", "tengr5", "ter1", "ter2", "ter3", "ter4", "ter5", "ti1", "ti2", "ti3", "ti4", "ti5", "tian1", "tian2", "tian3", "tian4", "tian5", "tiao1", "tiao2", "tiao3", "tiao4", "tiao5", "tiaor1", "tiaor2", "tiaor3", "tiaor4", "tiaor5", "tiar1", "tiar2", "tiar3", "tiar4", "tiar5", "tie1", "tie2", "tie3", "tie4", "tie5", "ting1", "ting2", "ting3", "ting4", "ting5", "tingr1", "tingr2", "tingr3", "tingr4", "tingr5", "tir1", "tir2", "tir3", "tir4", "tir5", "tong1", "tong2", "tong3", "tong4", "tong5", "tongr1", "tongr2", "tongr3", "tongr4", "tongr5", "tou1", "tou2", "tou3", "tou4", "tou5", "tour1", "tour2", "tour3", "tour4", "tour5", "tu1", "tu2", "tu3", "tu4", "tu5", "tuan1", "tuan2", "tuan3", "tuan4", "tuan5", "tuar1", "tuar2", "tuar3", "tuar4", "tuar5", "tui1", "tui2", "tui3", "tui4", "tui5", "tun1", "tun2", "tun3", "tun4", "tun5", "tuo1", "tuo2", "tuo3", "tuo4", "tuo5", "tuor1", "tuor2", "tuor3", "tuor4", "tuor5", "tur1", "tur2", "tur3", "tur4", "tur5", "wa1", "wa2", "wa3", "wa4", "wa5", "wai1", "wai2", "wai3", "wai4", "wai5", "wan1", "wan2", "wan3", "wan4", "wan5", "wang1", "wang2", "wang3", "wang4", "wang5", "wangr1", "wangr2", "wangr3", "wangr4", "wangr5", "war1", "war2", "war3", "war4", "war5", "wei1", "wei2", "wei3", "wei4", "wei5", "wen1", "wen2", "wen3", "wen4", "wen5", "weng1", "weng2", "weng3", "weng4", "weng5", "wengr1", "wengr2", "wengr3", "wengr4", "wengr5", "wo1", "wo2", "wo3", "wo4", "wo5", "wor1", "wor2", "wor3", "wor4", "wor5", "wu1", "wu2", "wu3", "wu4", "wu5", "wur1", "wur2", "wur3", "wur4", "wur5", "xi1", "xi2", "xi3", "xi4", "xi5", "xia1", "xia2", "xia3", "xia4", "xia5", "xian1", "xian2", "xian3", "xian4", "xian5", "xiang1", "xiang2", "xiang3", "xiang4", "xiang5", "xiangr1", "xiangr2", "xiangr3", "xiangr4", "xiangr5", "xiao1", "xiao2", "xiao3", "xiao4", "xiao5", "xiaor1", "xiaor2", "xiaor3", "xiaor4", "xiaor5", "xiar1", "xiar2", "xiar3", "xiar4", "xiar5", "xie1", "xie2", "xie3", "xie4", "xie5", "xin1", "xin2", "xin3", "xin4", "xin5", "xing1", "xing2", "xing3", "xing4", "xing5", "xingr1", "xingr2", "xingr3", "xingr4", "xingr5", "xiong1", "xiong2", "xiong3", "xiong4", "xiong5", "xiongr1", "xiongr2", "xiongr3", "xiongr4", "xiongr5", "xir1", "xir2", "xir3", "xir4", "xir5", "xiu1", "xiu2", "xiu3", "xiu4", "xiu5", "xiur1", "xiur2", "xiur3", "xiur4", "xiur5", "xu1", "xu2", "xu3", "xu4", "xu5", "xuan1", "xuan2", "xuan3", "xuan4", "xuan5", "xuar1", "xuar2", "xuar3", "xuar4", "xuar5", "xue1", "xue2", "xue3", "xue4", "xue5", "xun1", "xun2", "xun3", "xun4", "xun5", "xur1", "xur2", "xur3", "xur4", "xur5", "ya1", "ya2", "ya3", "ya4", "ya5", "yan1", "yan2", "yan3", "yan4", "yan5", "yang1", "yang2", "yang3", "yang4", "yang5", "yangr1", "yangr2", "yangr3", "yangr4", "yangr5", "yao1", "yao2", "yao3", "yao4", "yao5", "yaor1", "yaor2", "yaor3", "yaor4", "yaor5", "yar1", "yar2", "yar3", "yar4", "yar5", "ye1", "ye2", "ye3", "ye4", "ye5", "yi1", "yi2", "yi3", "yi4", "yi5", "yin1", "yin2", "yin3", "yin4", "yin5", "ying1", "ying2", "ying3", "ying4", "ying5", "yingr1", "yingr2", "yingr3", "yingr4", "yingr5", "yir1", "yir2", "yir3", "yir4", "yir5", "yo1", "yo2", "yo3", "yo4", "yo5", "yong1", "yong2", "yong3", "yong4", "yong5", "you1", "you2", "you3", "you4", "you5", "your1", "your2", "your3", "your4", "your5", "yu1", "yu2", "yu3", "yu4", "yu5", "yuan1", "yuan2", "yuan3", "yuan4", "yuan5", "yuar1", "yuar2", "yuar3", "yuar4", "yuar5", "yue1", "yue2", "yue3", "yue4", "yue5", "yun1", "yun2", "yun3", "yun4", "yun5", "yur1", "yur2", "yur3", "yur4", "yur5", "za1", "za2", "za3", "za4", "za5", "zai1", "zai2", "zai3", "zai4", "zai5", "zan1", "zan2", "zan3", "zan4", "zan5", "zang1", "zang2", "zang3", "zang4", "zang5", "zao1", "zao2", "zao3", "zao4", "zao5", "zaor1", "zaor2", "zaor3", "zaor4", "zaor5", "zar1", "zar2", "zar3", "zar4", "zar5", "ze1", "ze2", "ze3", "ze4", "ze5", "zei1", "zei2", "zei3", "zei4", "zei5", "zen1", "zen2", "zen3", "zen4", "zen5", "zeng1", "zeng2", "zeng3", "zeng4", "zeng5", "zha1", "zha2", "zha3", "zha4", "zha5", "zhai1", "zhai2", "zhai3", "zhai4", "zhai5", "zhan1", "zhan2", "zhan3", "zhan4", "zhan5", "zhang1", "zhang2", "zhang3", "zhang4", "zhang5", "zhangr1", "zhangr2", "zhangr3", "zhangr4", "zhangr5", "zhao1", "zhao2", "zhao3", "zhao4", "zhao5", "zhaor1", "zhaor2", "zhaor3", "zhaor4", "zhaor5", "zhar1", "zhar2", "zhar3", "zhar4", "zhar5", "zhe1", "zhe2", "zhe3", "zhe4", "zhe5", "zhei1", "zhei2", "zhei3", "zhei4", "zhei5", "zheir1", "zheir2", "zheir3", "zheir4", "zheir5", "zhen1", "zhen2", "zhen3", "zhen4", "zhen5", "zheng1", "zheng2", "zheng3", "zheng4", "zheng5", "zhengr1", "zhengr2", "zhengr3", "zhengr4", "zhengr5", "zhi1", "zhi2", "zhi3", "zhi4", "zhi5", "zhir1", "zhir2", "zhir3", "zhir4", "zhir5", "zhong1", "zhong2", "zhong3", "zhong4", "zhong5", "zhongr1", "zhongr2", "zhongr3", "zhongr4", "zhongr5", "zhou1", "zhou2", "zhou3", "zhou4", "zhou5", "zhour1", "zhour2", "zhour3", "zhour4", "zhour5", "zhu1", "zhu2", "zhu3", "zhu4", "zhu5", "zhua1", "zhua2", "zhua3", "zhua4", "zhua5", "zhuai1", "zhuai2", "zhuai3", "zhuai4", "zhuai5", "zhuan1", "zhuan2", "zhuan3", "zhuan4", "zhuan5", "zhuang1", "zhuang2", "zhuang3", "zhuang4", "zhuang5", "zhuangr1", "zhuangr2", "zhuangr3", "zhuangr4", "zhuangr5", "zhuar1", "zhuar2", "zhuar3", "zhuar4", "zhuar5", "zhui1", "zhui2", "zhui3", "zhui4", "zhui5", "zhun1", "zhun2", "zhun3", "zhun4", "zhun5", "zhuo1", "zhuo2", "zhuo3", "zhuo4", "zhuo5", "zhuor1", "zhuor2", "zhuor3", "zhuor4", "zhuor5", "zhur1", "zhur2", "zhur3", "zhur4", "zhur5", "zi1", "zi2", "zi3", "zi4", "zi5", "zir1", "zir2", "zir3", "zir4", "zir5", "zong1", "zong2", "zong3", "zong4", "zong5", "zongr1", "zongr2", "zongr3", "zongr4", "zongr5", "zou1", "zou2", "zou3", "zou4", "zou5", "zour1", "zour2", "zour3", "zour4", "zour5", "zu1", "zu2", "zu3", "zu4", "zu5", "zuan1", "zuan2", "zuan3", "zuan4", "zuan5", "zuanr1", "zuanr2", "zuanr3", "zuanr4", "zuanr5", "zui1", "zui2", "zui3", "zui4", "zui5", "zun1", "zun2", "zun3", "zun4", "zun5", "zuo1", "zuo2", "zuo3", "zuo4", "zuo5", "zuor1", "zuor2", "zuor3", "zuor4", "zuor5", "zur1", "zur2", "zur3", "zur4", "zur5"]; const VALID_PINYIN = new Set(VALID_PINYIN_ARRAY); const CN_CHAR = /[\u4e00-\u9fff\u3400-\u4dbf]/; const ERHUA_CHAR = '儿'; const ER_INDEPENDENT_WORDS = new Set(['儿子','男儿','儿童','幼儿','少儿','胎儿','婴儿','健儿','女儿','人儿']); const ER_NAME_SUFFIXES = ['灵儿','云儿','允儿','红儿','雪儿','翠儿','宝儿','婉儿','晨儿','月儿','星儿','龙儿','凤儿','蛮儿','花儿人']; function isErIndependentErx(text, erIndex) { const prev = erIndex > 0 ? text[erIndex - 1] : ''; const next = erIndex < text.length - 1 ? text[erIndex + 1] : ''; const bigram = prev + ERHUA_CHAR; if (ER_INDEPENDENT_WORDS.has(bigram)) return true; if (ER_INDEPENDENT_WORDS.has(ERHUA_CHAR + next)) return true; if (ER_NAME_SUFFIXES.includes(bigram)) return true; return false; } function parseTextTokens(text) { const tokens = []; let i = 0; while (i < text.length) { const ch = text[i]; if (CN_CHAR.test(ch)) { tokens.push({ type: 'chinese', char: ch, index: i }); } else if (/[a-zA-Z]/.test(ch)) { let word = ''; const start = i; while (i < text.length && /[a-zA-Z]/.test(text[i])) { word += text[i]; i++; } tokens.push({ type: 'english', char: word, index: start }); continue; } else if (/[α-ωΑ-Ω]/.test(ch)) { tokens.push({ type: 'greek', char: ch, index: i }); } else if (/[\d]/.test(ch)) { let num = ''; const start = i; while (i < text.length && /[\d]/.test(text[i])) { num += text[i]; i++; } tokens.push({ type: 'number', char: num, index: start }); continue; } else if (ch === ' ') { // skip } else { tokens.push({ type: 'punct', char: ch, index: i }); } i++; } return tokens; } function parsePinyinTokens(s) { return s.trim() ? s.trim().split(/\s+/).map(x => x.trim()).filter(x => x.length > 0) : []; } function isErhua(pinyin) { return /r[1-5]$/.test(pinyin); } function extractTone(pinyin) { const m = pinyin.match(/([1-5])$/); return m ? parseInt(m[1]) : null; } function extractBase(pinyin) { return pinyin.replace(/[1-5]$/, ''); } function getErhuaBase(pinyin) { return pinyin.replace(/r([1-5])$/, '$1'); } const STRUCT_PARTICLES = new Set('的地得'.split('')); const SENT_FINAL = new Set('吗呢吧啊呀哇嘛'.split('')); const ASPECT_PARTICLES = new Set('了着过'.split('')); const NOUN_SUFFIX = new Set('子们头'.split('')); const DIR_SUFFIX = new Set('上下里外面前后边'.split('')); function validatePinyinAnnotation(text, pinyinStr, VALID_PINYIN, rawPinyin) { const errors = []; const warnings = []; const infos = []; if (rawPinyin === undefined) rawPinyin = pinyinStr; const textTokens = parseTextTokens(text); const pinyinTokens = parsePinyinTokens(pinyinStr); const chineseChars = []; for (let i = 0; i < textTokens.length; i++) { if (textTokens[i].type === 'chinese') chineseChars.push({ ...textTokens[i], tokenIdx: i }); } let pIdx = 0, cIdx = 0; const alignment = []; while (cIdx < chineseChars.length && pIdx < pinyinTokens.length) { const ch = chineseChars[cIdx]; const py = pinyinTokens[pIdx]; const nextCh = cIdx + 1 < chineseChars.length ? chineseChars[cIdx + 1] : null; const isNextEr = nextCh && nextCh.char === ERHUA_CHAR; const pyIsErhua = isErhua(py); const erIsIndependent = isNextEr && isErIndependentErx(text, nextCh.index); if (pyIsErhua && isNextEr && !erIsIndependent) { alignment.push({ char: ch.char + ERHUA_CHAR, pinyin: py, charIdx: cIdx, pinyinIdx: pIdx, isErhua: true, textIndex: ch.index, textIndexEnd: nextCh.index }); cIdx += 2; pIdx += 1; } else if (!pyIsErhua && isNextEr && !erIsIndependent) { alignment.push({ char: ch.char, pinyin: py, charIdx: cIdx, pinyinIdx: pIdx, isErhua: false, textIndex: ch.index, textIndexEnd: ch.index }); cIdx += 1; pIdx += 1; } else { alignment.push({ char: ch.char, pinyin: py, charIdx: cIdx, pinyinIdx: pIdx, isErhua: false, textIndex: ch.index, textIndexEnd: ch.index }); cIdx += 1; pIdx += 1; } } while (cIdx < chineseChars.length) { alignment.push({ char: chineseChars[cIdx].char, pinyin: null, charIdx: cIdx, pinyinIdx: -1, isErhua: false, textIndex: chineseChars[cIdx].index, textIndexEnd: chineseChars[cIdx].index }); cIdx++; } while (pIdx < pinyinTokens.length) { alignment.push({ char: null, pinyin: pinyinTokens[pIdx], charIdx: -1, pinyinIdx: pIdx, isErhua: false, textIndex: -1, textIndexEnd: -1 }); pIdx++; } const erhuaCount = alignment.filter(a => a.isErhua).length; const expectedPinyin = chineseChars.length - erhuaCount; if (pinyinTokens.length !== expectedPinyin) { errors.push({ type: 'error', code: 'COUNT_MISMATCH', message: `拼音数量不匹配:有效汉字预期 ${expectedPinyin} 个拼音,实际 ${pinyinTokens.length} 个(差值=${pinyinTokens.length - expectedPinyin})` }); } for (const a of alignment) { if (!a.pinyin) continue; const py = a.pinyin.trim(); if (/^[α-ωΑ-Ω]$/.test(py)) continue; const digitMatch = py.match(/(\d+)$/); const hasMultiTone = digitMatch && digitMatch[1].length > 1; const hasInvalidTone = digitMatch && digitMatch[1].length === 1 && (parseInt(digitMatch[1]) < 1 || parseInt(digitMatch[1]) > 5); const hasNoTone = !digitMatch; const couldBePinyinNoTone = hasNoTone && /^[a-z]+$/.test(py) && [1,2,3,4,5].some(t => VALID_PINYIN.has(py + t)); if (hasMultiTone || hasInvalidTone || couldBePinyinNoTone) continue; if (hasNoTone && /^[A-Z]/.test(py)) continue; if (hasNoTone && /^[a-zA-Z]{2,}$/.test(py) && !couldBePinyinNoTone) continue; if (!VALID_PINYIN.has(py)) { const base = getErhuaBase(py); let suggestion = ''; if (VALID_PINYIN.has(base)) suggestion = `(可能是变调,合法形: "${base}")`; else { const baseNoTone = extractBase(py); const similar = [...VALID_PINYIN].filter(v => v.startsWith(baseNoTone)).slice(0, 3); if (similar.length > 0) suggestion = `(相似: ${similar.join(', ')})`; } errors.push({ type: 'error', code: 'NOT_IN_LIST', message: `非法拼音: "${py}"${suggestion}`, char: a.char, pinyin: py, pinyinIdx: a.pinyinIdx }); } } for (let i = 0; i < pinyinTokens.length; i++) { const py = pinyinTokens[i]; if (/^[α-ωΑ-Ω]$/.test(py)) continue; const digitMatch = py.match(/(\d+)$/); if (!digitMatch) { if (/^[a-z]+$/.test(py)) { const couldBePinyin = [1,2,3,4,5].some(t => VALID_PINYIN.has(py + t)); if (couldBePinyin) errors.push({ type: 'error', code: 'NO_TONE', message: `缺声调(1-5): "${py}"`, pinyin: py, pinyinIdx: i }); } else if (!/^[A-Z]/.test(py)) { errors.push({ type: 'error', code: 'NO_TONE', message: `拼音 "${py}" 缺声调数字`, pinyin: py, pinyinIdx: i }); } continue; } const digits = digitMatch[1]; if (digits.length > 1) { errors.push({ type: 'error', code: 'MULTI_TONE', message: `声调数字多余: "${py}" 只能有一个声调`, pinyin: py, pinyinIdx: i }); continue; } const toneVal = parseInt(digits); if (toneVal < 1 || toneVal > 5) { errors.push({ type: 'error', code: 'INVALID_TONE', message: `声调无效: "${toneVal}" (只能是1-5)`, pinyin: py, pinyinIdx: i }); } } for (let i = 0; i < pinyinTokens.length; i++) { const py = pinyinTokens[i]; if (/^[a-zA-Z]{2,}$/.test(py) && !/([1-5]){2,}/.test(py)) continue; if (/^[α-ωΑ-Ω]$/.test(py)) continue; const concatMatch = py.match(/([a-z]+[1-5])([a-z]+)/i); if (concatMatch) { errors.push({ type: 'error', code: 'MISSING_SPACE', message: `多拼音粘连: "${py}" 缺少空格`, pinyin: py, pinyinIdx: i }); } else if (py.match(/[1-5][a-z]/i)) { errors.push({ type: 'error', code: 'MISSING_SPACE', message: `多拼音粘连: "${py}" 缺少空格`, pinyin: py, pinyinIdx: i }); } if (/[A-Z]/.test(py) && /[a-z]/.test(py) && /[1-5]/.test(py)) { warnings.push({ type: 'warning', code: 'MIXED_CASE', message: `不应包含大写: "${py}"`, pinyin: py, pinyinIdx: i }); } } for (const a of alignment) { if (!a.pinyin || !a.char) continue; if (a.char === ERHUA_CHAR && a.pinyin && !a.pinyin.match(/^er[1-5]$/)) { warnings.push({ type: 'warning', code: 'ER_STANDALONE', message: `独立"儿"拼音应为 er+声调,实为 "${a.pinyin}"`, char: a.char, pinyin: a.pinyin, pinyinIdx: a.pinyinIdx }); } if (a.pinyin && isErhua(a.pinyin) && !a.isErhua && a.char && !a.char.includes(ERHUA_CHAR)) { warnings.push({ type: 'warning', code: 'ERHUA_NO_ER', message: `含儿化标记 'r',但"${a.char}"后无"儿"字`, char: a.char, pinyin: a.pinyin, pinyinIdx: a.pinyinIdx }); } } for (const a of alignment) { if (!a.char || !a.pinyin) continue; const ch = a.char.length === 2 ? a.char[0] : a.char; const tone = extractTone(a.pinyin); if (tone === null) continue; if (STRUCT_PARTICLES.has(ch) && tone !== 5) infos.push({ type: 'info', code: 'LIGHT', message: `结构助词通常读轻声5: "${ch}" (${a.pinyin})`, char: ch, pinyin: a.pinyin, pinyinIdx: a.pinyinIdx }); if (SENT_FINAL.has(ch) && tone !== 5) warnings.push({ type: 'warning', code: 'LIGHT', message: `语气词通常读轻声5: "${ch}" (${a.pinyin})`, char: ch, pinyin: a.pinyin, pinyinIdx: a.pinyinIdx }); if (ch === '了' && tone !== 5) infos.push({ type: 'info', code: 'LIGHT', message: `作时态助词读 le5: "${ch}" (${a.pinyin})`, char: ch, pinyin: a.pinyin, pinyinIdx: a.pinyinIdx }); if (ch === '着' && tone !== 5) infos.push({ type: 'info', code: 'LIGHT', message: `作持续态助词读 zhe5: "${ch}" (${a.pinyin})`, char: ch, pinyin: a.pinyin, pinyinIdx: a.pinyinIdx }); if (ch === '过' && tone !== 5) infos.push({ type: 'info', code: 'LIGHT', message: `作时态助词读 guo5: "${ch}" (${a.pinyin})`, char: ch, pinyin: a.pinyin, pinyinIdx: a.pinyinIdx }); if (a.charIdx >= 1 && chineseChars[a.charIdx - 1]?.char === ch && tone !== 5) { infos.push({ type: 'info', code: 'LIGHT_REDUP', message: `重叠动词第二字通常读轻声5: "${ch}${ch}"`, char: ch, pinyin: a.pinyin, pinyinIdx: a.pinyinIdx }); } } for (let i = 0; i < alignment.length; i++) { const a = alignment[i]; if (!a.pinyin || !a.char) continue; const tone = extractTone(a.pinyin); if (tone === null) continue; const nextA = i + 1 < alignment.length ? alignment[i + 1] : null; const ch = a.char.length === 2 ? a.char[0] : a.char; if (ch === '一' && nextA && nextA.pinyin) { const nextTone = extractTone(nextA.pinyin); if (nextTone !== null) { if (nextTone === 4 && tone !== 2) infos.push({ type: 'info', code: 'SANDHI', message: `"一"在四声前变二声: yi2`, pinyinIdx: a.pinyinIdx }); else if (nextTone !== 4 && nextTone !== 5 && tone !== 4 && tone !== 1) infos.push({ type: 'info', code: 'SANDHI', message: `"一"在非四声前变四声: yi4`, pinyinIdx: a.pinyinIdx }); } } if (ch === '不' && nextA && nextA.pinyin) { const nextTone = extractTone(nextA.pinyin); if (nextTone === 4 && tone !== 2) infos.push({ type: 'info', code: 'SANDHI', message: `"不"在四声前变二声: bu2`, pinyinIdx: a.pinyinIdx }); } if (tone === 3 && nextA && nextA.pinyin && nextA.char) { const nextTone = extractTone(nextA.pinyin); if (nextTone === 3) { let hasSeparator = false; if (a.textIndexEnd >= 0 && nextA.textIndex >= 0 && nextA.textIndex > a.textIndexEnd + 1) { const between = text.slice(a.textIndexEnd + 1, nextA.textIndex); if (/\S/.test(between)) hasSeparator = true; } if (!hasSeparator) infos.push({ type: 'info', code: 'SANDHI', message: `三三相连,前字可能变二声`, pinyinIdx: a.pinyinIdx }); } } } for (const tok of textTokens) { if (tok.type === 'english' && !pinyinStr.includes(tok.char)) warnings.push({ type: 'warning', code: 'ENGLISH_MISSING', message: `英文应保留原文: "${tok.char}"` }); } return { alignment, errors, warnings, infos }; } // 2. 主体功能与界面渲染 function pinyin() { const parent = document.querySelector("#app > div > div.ivu-layout > div.z-main.z-main-s > div > div > div.z-stage__content > form > div.ivu-card > div.ivu-card-body"); parent.insertAdjacentHTML('beforeend', '
'+ '
'+ '
'+ '
'+ '
📌 汉字数 0
'+ '
🔤 有效拼音 0
'+ '
⏸️ 已置灰 0
'+ '
✅ 悬停后按 X 调整位置 | 按 0-5 改声调
'+ '
'+ '
'+ ''+ ''+ '
'+ '
'+ '
' ); const css = ` * { box-sizing: border-box; margin: 0; padding: 0; } body { font-family: "Microsoft YaHei", sans-serif; background: #f5f7fa; } .container { background: #fff; border-radius: 16px; box-shadow: 0 8px 30px rgba(0,0,0,0.12); padding: 10px 10px; } .char-grid { display: flex; flex-wrap: wrap; min-height: 120px; } .char-item { display: flex; flex-direction: column; align-items: center; background: #ffffff; border-radius: 10px; padding: 6px 6px 8px 6px; min-width: 52px; transition: 0.15s; box-shadow: 0 1px 3px rgba(0,0,0,0.02); cursor: default; position: relative; } .char-item:hover { border-color: #2b6fdb; box-shadow: 0 4px 12px rgba(43,111,219,0.10); z-index: 2; } .char-item.hovering { border-color: #e67e22; box-shadow: 0 0 0 3px rgba(230, 126, 34, 0.2), 0 4px 12px rgba(43,111,219,0.10); } .char-item .hanzi { font-size: 28px; font-weight: 500; color: #0b1a2b; line-height: 1.3; letter-spacing: 0.5px; min-height: 38px; display: flex; align-items: center; justify-content: center; width: 100%; border-top: 1px dashed #e2e9f2; padding-top: 4px; order: 1; cursor: default; transition: 0.15s; border-radius: 4px; user-select: none; } .char-item .hanzi:hover { background: #eef4ff; color: #1a56b8; border-top-color: #2b6fdb; } .char-item .pinyin-input { width: 55px; border: 1px solid #d8e0ea; border-radius: 6px; background: #f8faff; font-size: 13px; text-align: center; padding: 4px 2px; color: #1f2a3a; outline: none; transition: 0.15s; font-family: "Courier New", monospace; font-weight: 500; letter-spacing: 0.3px; order: 0; margin-bottom: 4px; } .char-item .pinyin-input:focus { border-color: #2b6fdb; background: #ffffff; box-shadow: 0 0 0 3px rgba(43,111,219,0.12); } .char-item .pinyin-input:hover { background: #ffffff; border-color: #8aa3c0; } .char-item .pinyin-input.grayed { background: #f5f5f5; color: #b0b0b0; border-color: #d0d0d0; text-decoration: line-through; cursor: not-allowed; } .char-item .hanzi.grayed-hanzi { color: #b0b0b0; cursor: not-allowed; opacity: 0.6; } .char-item.punct .pinyin-input { background: #f0f3f6; border-color: #d0d8e2; } .char-item.punct .hanzi { color: #7a8a9e; border-top-color: #e8eef5; } .char-item.empty .hanzi { color: #b0c0d0; } .char-item.empty .pinyin-input { background: #f0f3f6; border-color: #d0d8e2; } .char-item.shifting { animation: shiftPulse 0.6s ease; } @keyframes shiftPulse { 0%, 100% { background: #ffffff; } 50% { background: #d4e4ff; border-color: #2b6fdb; } } .char-item.has-error { border-color: #ff6b6b; background: rgba(255,107,107,0.06); } .char-item.has-error .pinyin-input { color: #ff3b3b; font-weight: 600; border-color: #ff8e8e; } .char-item.has-warn { border-color: #ffbe46; background: rgba(255,190,70,0.08); } .char-item.has-warn .pinyin-input { color: #e67e22; font-weight: 600; border-color: #fbd386; } .char-item.has-info { border-color: #74c0fc; background: rgba(116,192,252,0.08); } .char-item.has-info .pinyin-input { color: #2b8ee3; border-color: #a4d8ff; } .char-item[data-tip]::after { content: attr(data-tip); position: absolute; bottom: 105%; left: 50%; transform: translateX(-50%); background: #1f2a3a; color: #fff; padding: 8px 12px; border-radius: 6px; font-size: 12px; white-space: pre-wrap; width: max-content; max-width: 240px; z-index: 100; pointer-events: none; box-shadow: 0 4px 16px rgba(0,0,0,0.25); line-height: 1.5; opacity: 0; visibility: hidden; transition: 0.15s; } .char-item[data-tip]:hover::after { opacity: 1; visibility: visible; } .char-item[data-tip]::before { content: ''; position: absolute; bottom: 100%; left: 50%; transform: translateX(-50%); border: 5px solid transparent; border-top-color: #1f2a3a; opacity: 0; visibility: hidden; transition: 0.15s; z-index: 100; pointer-events: none; } .char-item[data-tip]:hover::before { opacity: 1; visibility: visible; } .global-errors { background: #fff4f4; border: 1px solid #ffcdcd; border-left: 4px solid #ff6b6b; padding: 12px 16px; margin-top: 15px; border-radius: 6px; font-size: 14px; color: #d63031; display: none; line-height: 1.6; } .stats { margin-top: 20px; display: flex; gap: 20px; font-size: 14px; color: #3a4a5e; background: #f3f7fc; padding: 12px 22px; border-radius: 40px; flex-wrap: wrap; align-items: center; } .stats span { background: white; padding: 2px 16px; border-radius: 30px; border: 1px solid #dce5ef; font-weight: 500; color: #1f2a3a; } .status-msg { color: #1f7b4d; margin-left: 12px; } .bottom-toolbar { margin-top: 20px; display: flex; gap: 14px; justify-content: center; flex-wrap: wrap; padding-top: 20px; border-top: 1px solid #e8edf4; } .bottom-toolbar button { background: #2b6fdb; color: #fff; border: none; padding: 8px 22px; border-radius: 30px; font-size: 15px; font-weight: 500; cursor: pointer; transition: 0.2s; box-shadow: 0 2px 6px rgba(43,111,219,0.3); } .bottom-toolbar button:hover { background: #1a56b8; transform: translateY(-1px); } .bottom-toolbar button.success { background: #1f9d6b; } .bottom-toolbar button.success:hover { background: #16825a; } .toast { position: fixed; top: 30px; left: 50%; transform: translateX(-50%); background: #1f2a3a; color: white; padding: 12px 28px; border-radius: 10px; font-size: 15px; z-index: 999; opacity: 0; transition: 0.3s; pointer-events: none; box-shadow: 0 8px 30px rgba(0,0,0,0.2); } .toast.show { opacity: 1; transform: translateX(-50%) translateY(0); } `; const style = document.createElement('style'); style.textContent = css; document.head.appendChild(style); const pinyinSourceNode = document.querySelector("#app > div > div.ivu-layout > div.z-main.z-main-s > div > div > div.z-stage__content > form > div.ivu-card > div.ivu-card-body > div:nth-child(1) > div:nth-child(2) > div > div.customLayoutInput > div.customInput.horizontalLtr") const text = document.querySelector("#app > div > div.ivu-layout > div.z-main.z-main-s > div > div > div.z-stage__content > form > div.ivu-card > div.ivu-card-body > div:nth-child(1) > div:nth-child(1) > div > div.customLayoutInput > div.customInput.horizontalLtr").innerText const pinyinRaw = pinyinSourceNode ? pinyinSourceNode.innerText : ''; const pinyinTokens = pinyinRaw.split(/\s+/).filter(p => p.length > 0); const chars = Array.from(text); // 构建数据 let pinyinIndex = 0; const charData = chars.map((ch) => { const isHanzi = CN_CHAR.test(ch); let pinyin = null; if (isHanzi) { if (pinyinIndex < pinyinTokens.length) pinyin = pinyinTokens[pinyinIndex++]; else pinyin = ''; } return { char: ch, pinyin: pinyin, isHanzi: isHanzi, isGrayed: false, issues: [] }; }); const originalData = JSON.parse(JSON.stringify(charData)); const grid = document.getElementById('charGrid'); const hanziCountSpan = document.getElementById('hanziCount'); const pinyinFilledSpan = document.getElementById('pinyinFilled'); const grayedCountSpan = document.getElementById('grayedCount'); const statusMsg = document.getElementById('statusMsg'); const globalErrorsDiv = document.getElementById('globalErrors'); const toast = document.getElementById('toastMsg'); let toastTimer = null; let hoveredIndex = -1; function showToast(message, isError = false) { toast.textContent = message; toast.style.background = isError ? '#c0392b' : '#1f2a3a'; toast.classList.add('show'); clearTimeout(toastTimer); toastTimer = setTimeout(() => toast.classList.remove('show'), 2500); } function runValidationAndUpdateData() { const activeText = charData.map(item => item.isGrayed ? ' ' : item.char).join(''); const activePinyinStr = charData.filter(item => !item.isGrayed && item.pinyin && item.pinyin.trim().length > 0).map(item => item.pinyin).join(' '); charData.forEach(c => c.issues = []); const result = validatePinyinAnnotation(activeText, activePinyinStr, VALID_PINYIN); const pinyinIdxToCharIdx = {}; result.alignment.forEach(a => { if (a.pinyinIdx !== undefined && a.pinyinIdx >= 0 && a.textIndex >= 0) { pinyinIdxToCharIdx[a.pinyinIdx] = a.textIndex; } }); const globalMsgs = []; const addIssue = (issue, level) => { if (issue.pinyinIdx !== undefined && issue.pinyinIdx >= 0) { const cIdx = pinyinIdxToCharIdx[issue.pinyinIdx]; if (cIdx !== undefined && charData[cIdx]) { charData[cIdx].issues.push({ level, message: issue.message }); } else globalMsgs.push(`[${level.toUpperCase()}] ${issue.message}`); } else globalMsgs.push(`[${level.toUpperCase()}] ${issue.message}`); }; result.errors.forEach(e => addIssue(e, 'error')); result.warnings.forEach(w => addIssue(w, 'warn')); result.infos.forEach(i => addIssue(i, 'info')); return globalMsgs; } function updateUI() { const globalMsgs = runValidationAndUpdateData(); let hanziCount = 0, pinyinFilled = 0, grayedCount = 0; charData.forEach(item => { if (item.isHanzi) { hanziCount++; } if (item.isGrayed) { grayedCount++; } else if (item.pinyin && item.pinyin.trim().length > 0) { pinyinFilled++; } }); hanziCountSpan.textContent = hanziCount; pinyinFilledSpan.textContent = pinyinFilled; grayedCountSpan.textContent = grayedCount; if (globalMsgs.length > 0) { globalErrorsDiv.innerHTML = globalMsgs.join('
'); globalErrorsDiv.style.display = 'block'; } else { globalErrorsDiv.style.display = 'none'; globalErrorsDiv.innerHTML = ''; } const items = grid.querySelectorAll('.char-item'); charData.forEach((item, idx) => { const div = items[idx]; if (!div) return; const input = div.querySelector('.pinyin-input'); const hanziSpan = div.querySelector('.hanzi'); if (item.isGrayed) { input.classList.add('grayed'); input.disabled = true; input.placeholder = '✗'; hanziSpan.classList.add('grayed-hanzi'); hanziSpan.title = '已置灰'; } else { input.classList.remove('grayed'); input.disabled = false; input.placeholder = '-'; hanziSpan.classList.remove('grayed-hanzi'); hanziSpan.title = ''; } if (document.activeElement !== input) input.value = item.pinyin || ''; div.classList.remove('has-error', 'has-warn', 'has-info'); div.removeAttribute('data-tip'); if (item.issues && item.issues.length > 0 && !item.isGrayed) { const hasErr = item.issues.some(i => i.level === 'error'); const hasWarn = item.issues.some(i => i.level === 'warn'); if (hasErr) div.classList.add('has-error'); else if (hasWarn) div.classList.add('has-warn'); else div.classList.add('has-info'); const tipText = item.issues.map(i => { const prefix = i.level === 'error' ? '❌' : (i.level === 'warn' ? '⚠️' : '💡'); return `${prefix} ${i.message}`; }).join('\n'); div.setAttribute('data-tip', tipText); } }); const available = hanziCount - grayedCount; if (available === 0) { statusMsg.textContent = '⏹️ 所有汉字已置灰'; statusMsg.style.color = '#7a8a9e'; } else if (pinyinFilled === 0 && grayedCount === 0) { statusMsg.textContent = '📝 悬停后按 X 调整 | 按 0-5 改声调'; statusMsg.style.color = '#4a5a6e'; } else { statusMsg.textContent = `✏️ 拼音已填 ${pinyinFilled} | 悬停按 X 调整 | 按 0-5 改声调`; statusMsg.style.color = '#b16f1a'; } } function initGrid() { grid.innerHTML = ''; charData.forEach((item, idx) => { const div = document.createElement('div'); div.className = 'char-item'; if (!item.isHanzi) div.classList.add('punct'); if (item.char === ' ') div.classList.add('empty'); const input = document.createElement('input'); input.className = 'pinyin-input'; input.type = 'text'; input.dataset.index = idx; input.placeholder = '-'; input.addEventListener('input', function(e) { const val = this.value.trim(); if (charData[idx] && !charData[idx].isGrayed) { charData[idx].pinyin = val || ''; updateUI(); } }); input.addEventListener('blur', function() { if (this.value !== this.value.trim()) { this.value = this.value.trim(); this.dispatchEvent(new Event('input', { bubbles: true })); } }); div.appendChild(input); const hanziSpan = document.createElement('div'); hanziSpan.className = 'hanzi'; hanziSpan.textContent = item.char === ' ' ? '␣' : item.char; hanziSpan.addEventListener('mouseenter', function() { hoveredIndex = idx; if (!charData[idx].isGrayed) div.classList.add('hovering'); }); hanziSpan.addEventListener('mouseleave', function() { hoveredIndex = -1; div.classList.remove('hovering'); }); div.appendChild(hanziSpan); grid.appendChild(div); }); updateUI(); } function changePinyinTone(index, toneNumber) { if (index < 0 || index >= charData.length) return; const item = charData[index]; if (item.isGrayed || !item.pinyin) return; const lastChar = item.pinyin.charAt(item.pinyin.length - 1); if (/[0-5]/.test(lastChar)) item.pinyin = item.pinyin.slice(0, -1) + toneNumber; else item.pinyin = item.pinyin + toneNumber; updateUI(); showToast(`✅ 声调已改: ${toneNumber}`); } function shiftPinyinFromIndex(startIndex) { if (startIndex < 0 || startIndex >= charData.length) return; if (charData[startIndex].isGrayed) return; const hasPinyin = charData[startIndex].pinyin && charData[startIndex].pinyin.length > 0; if (hasPinyin) { // 情况1:有拼音 -> 拼音后移 (向后腾出空间) let lastValidIdx = -1; for (let i = charData.length - 1; i >= startIndex; i--) { if (!charData[i].isGrayed) { if (lastValidIdx !== -1) { charData[lastValidIdx].pinyin = charData[i].pinyin; } lastValidIdx = i; } } charData[startIndex].pinyin = ''; showToast(`✅ 「${charData[startIndex].char}」拼音已后移`); } else { // 情况2:无拼音 -> 拼音前移 (填充当前空位) const pinyinList = []; const indices = []; for (let i = startIndex; i < charData.length; i++) { if (!charData[i].isGrayed) { pinyinList.push(charData[i].pinyin || ''); indices.push(i); } } if (pinyinList.length > 0) { for (let i = 0; i < indices.length - 1; i++) { charData[indices[i]].pinyin = charData[indices[i+1]].pinyin; } charData[indices[indices.length - 1]].pinyin = ''; showToast(`✅ 「${charData[startIndex].char}」填补后续拼音`); } } // 重新渲染 updateUI(); // 高亮动画 setTimeout(() => { const items = grid.querySelectorAll('.char-item'); if (items[startIndex]) { items[startIndex].classList.add('shifting'); setTimeout(() => items[startIndex].classList.remove('shifting'), 600); } }, 50); } document.addEventListener('keydown', function(e) { const key = e.key; if (key === 'x' || key === 'X') { if (hoveredIndex >= 0 && hoveredIndex < charData.length) { if (!charData[hoveredIndex].isGrayed) { e.preventDefault(); shiftPinyinFromIndex(hoveredIndex); } } } if (/^[0-5]$/.test(key)) { if (hoveredIndex >= 0 && hoveredIndex < charData.length) { if (!charData[hoveredIndex].isGrayed && charData[hoveredIndex].pinyin) { e.preventDefault(); changePinyinTone(hoveredIndex, parseInt(key)); } } } }); document.getElementById('resetBtn').addEventListener('click', () => { charData.forEach((item, idx) => { item.pinyin = originalData[idx].pinyin; item.isGrayed = false; }); updateUI(); showToast('↻ 已重置并重新检测'); }); document.getElementById('backfillBtn').addEventListener('click', () => { const pinyinList = charData .filter(item => !item.isGrayed && item.pinyin && item.pinyin.trim().length > 0) .map(item => item.pinyin); if (pinyinList.length === 0) return showToast('⚠️ 没有可回填的拼音', true); const result = pinyinList.join(' '); if (!pinyinSourceNode) { return showToast('⚠️ 找不到原始拼音输入框,回填失败', true); } const tag = pinyinSourceNode.tagName.toLowerCase(); if (tag === 'input' || tag === 'textarea') { pinyinSourceNode.value = result; pinyinSourceNode.dispatchEvent(new Event('input', { bubbles: true })); pinyinSourceNode.dispatchEvent(new Event('change', { bubbles: true })); } else if (pinyinSourceNode.isContentEditable) { pinyinSourceNode.textContent = result; pinyinSourceNode.dispatchEvent(new Event('input', { bubbles: true })); } else { pinyinSourceNode.textContent = result; pinyinSourceNode.dispatchEvent(new Event('input', { bubbles: true })); } showToast(`✅ 已回填 ${pinyinList.length} 个拼音到页面`); }); initGrid(); console.log('✅ 带实时检测功能的拼音标注页面加载完成'); } document.addEventListener('keydown', (e) => { if (e.key === "p" && document.activeElement.tagName !== 'INPUT' && document.activeElement.tagName !== 'TEXTAREA') { pinyin(); } if (e.key === ' '){ e.preventDefault(); document.querySelector("#engine0_default_item_音频 > div.item-component > div > div > div.audio-left-panel > ul > div.video-area > div.video-control-bar > div.video-control-center > i:nth-child(2)").click() } }); })();