有趣的宋词
菩萨蛮(回文)
等闲将度三春景。景春三度将闲等。愁怕更高楼。楼高更怕愁。
弄花梅已动。动已梅花弄。梅看几年催。催年几看梅。
赏花归去马如飞酒力微醒时已暮 palindrome
无无无有有无无,悟得无无便不愚。日月年时损壮粗。见元初。万道霞光攒宝珠。
if hz == prev_hz: dup = True; dup_hz = hz
if re.search('满.+酒', line): print(line)
.py和.txt: https://files.cnblogs.com/files/blogs/714801/songci.zip 我尽力删里面的xx书网膏药了。
1 # -*- coding: utf-8 -*- 2 import re 3 4 def handle_line(line): 5 if len(line) > 40: return 6 d = {}; max = 0 7 for hz in line: 8 if hz in set('、,。之囗'): continue 9 t = d[hz] = 1 + d.get(hz, 0) 10 if t > max: max = t 11 if max > 6: 12 d = {k:v for k,v in d.items() if v > 2} # dict comprehension 13 print(line, d) 14 input() 15 16 def handle_line2(line): 17 if len(line) > 40: return 18 prev_hz = dup_hz = ''; dup = False 19 for hz in line: 20 if hz in set('、,。之囗'): continue 21 if hz == prev_hz: dup = True; dup_hz = hz 22 prev_hz = hz 23 if dup: 24 dup_hz += dup_hz 25 global all_dup_hz 26 all_dup_hz[dup_hz] = 1 + all_dup_hz.get(dup_hz, 0) 27 print(line.strip('\n'), '\t', dup_hz, sep='') 28 29 def handle_line3(line): 30 if len(line) > 40: return 31 if re.search('满.+酒', line): print(line) 32 # compile(pattern, flags=0) Compile a regular expression pattern, returning a Pattern object. 33 # help(re.compile) 34 35 all_dup_hz = {} 36 with open('宋词.txt', 'r', encoding='utf-16le') as f: 37 for line in f: handle_line2(line) 38 d = all_dup_hz 39 if len(d) == 0: quit() 40 if False: 41 d = sorted(d.items(), key=lambda i: i[1], reverse=True) # item (key, value) 42 d = [(k,v) for (k,v) in d if v > 10] 43 print(d) 44 else: 45 print([(l,_)for(l,_)in sorted(d.items(),key=lambda _:_[1],reverse=-1)if _>10])
ang ['丈', '上', '丧', '乓', '亡', '亢', '仓', '仗', '仰', '仿', '伤', '佯', '倘', '倘', '倡', '偿', '傍', '党', '养', '冈', '刚', '厂', '吭', '唐', '唱', '商', '嗓', '嚷', '囊', '场', '坊', '堂', '塘', '墒', '壤', '央', '夯', '妄', '妨', '尚', '尝', '岗', '巷', '帐', '帮', '常', '庞', '康', '廊', '张', '当', '彭', '彰', '往', '忘', '忙', '慷', '房', '扛', '扛', '扬', '抗', '挡', '掌', '搪', '攘', '放', '敞', '方', '旁', '旺', '昂', '昌', '晌', '朗', '望', '杖', '杠', '杨', '杭', '枉', '样', '桑', '档', '梆', '棒', '棠', '榔', '榜', '樟', '殃', '氓', '氧', '汤', '汤', '汪', '沧', '洋', '浪', '涨', '淌', '淌', '港', '漳', '漾', '炕', '烫', '狼', '猖', '王', '琅', '瓤', '畅', '疡', '痒', '瘴', '盎', '盲', '磅', '磅', '秧', '章', '糖', '糠', '纲', '纺', '绑', '缸', '网', '羊', '耪', '肛', '肠', '肪', '肮', '胀', '胖', '脏', '膀', '膛', '航', '舱', '芒', '芳', '苍', '茫', '荡', '莽', '葬', '藏', '藏', '蚌', '裳', '让', '访', '详', '谤', '账', '赃', '赏', '趟', '躺', '邦', '郎', '钢', '镑', '长', '防', '阳', '障', '鸯']
https://files.cnblogs.com/files/blogs/714801/py-tbl.zip
1 # -*- coding: utf-8 -*- 2 import re 3 4 def main(): 5 d = {} 6 for line in [i for i in re.split('[\r|\n]', get_raw_tbl(), ) if i != '']: 7 (py, hzs) = line.split() 8 py = re.sub('^.+-', '', py) 9 d[py] = hzs + d.get(py, '') 10 py = 'ang' 11 simple_hz_set = gb2312_set() 12 print(py, sorted([hz for hz in d[py] if hz in simple_hz_set])) 13 14 def get_raw_tbl(): 15 return ''' 16 a 吖阿啊锕嗄錒 17 z-uo 左作坐阼佐苲怍岝咗岞侳柮柞昨祚胙唑座秨袏莋笮做捽唶葄酢葃琢蓙稓筰鈼飵撮諎嘬穝繓糳 18 ''' 19 20 def gb2312_set(): 21 return set(''' 22 啊阿埃挨哎唉哀皑癌蔼矮艾碍爱隘鞍氨安俺按暗岸胺案肮昂盎凹敖熬翱袄傲奥懊澳芭捌扒叭吧 23 ''') 24 25 main()
print(set(d[py]).intersection(gb2312_set()))