python-DTW


python-DTW

import xlrd
import numpy as np
import matplotlib.pyplot as plt

# dtaidistance!!!优选
from dtaidistance import dtw
from dtaidistance import dtw_visualisation as dtwvis

# 归一化
from sklearn.preprocessing import MinMaxScaler 
# 标准睡美人
wb = xlrd.open_workbook("ex.xlsx") 

# 待测数据
'''
"1990-1999.xlsx"
wb_1990_1999
wb.sheet_by_index(0)--> 1990
wb.sheet_by_index(1)--> 1991
···
wb.sheet_by_index(9)--> 1999


"2000_2010.xlsx"
wb_2000_2010
wb.sheet_by_index(0)--> 2000
···
wb.sheet_by_index(10)--> 2010
'''
wb_1990_1999 = xlrd.open_workbook("1990-1999.xlsx") 
wb_2000_2010 = xlrd.open_workbook("2000-2010.xlsx") 

sh = wb.sheet_by_index(0)

# 一张Excel的第一个sheet 1990
#sh_1990 = wb_1990_1999.sheet_by_index(0)

# 另一张Excel的第一个sheet 2000
#sh_2000 = wb_2000_2010.sheet_by_index(0)
# 批量读取sheet
names = locals()
for i in range(0,10):
    names["sh_"+str(1990+i)] = wb_1990_1999.sheet_by_index(i)
    
    
names = locals()
for i in range(0,11):
    names["sh_"+str(2000+i)] = wb_2000_2010.sheet_by_index(i)    

数据预处理

# 睡美人文献
# 存放每年被引频次
table = [] 
for row in range(0, sh.nrows):
    # y里面保存的是每年文献的被引次数。
    Y = sh.row_values(row)[0:] 
    table.append(Y)
print(table)
names = locals()
for i in range(0,21):
    # 每年创建一个空列表
    names["table_"+str(1990+i)] = []
    
    # 针对每一个sheet 按年读取
    for row in range(0, names["sh_"+str(1990+i)].nrows):
        
        # y里面保存的是每年文献的被引次数
        Y = names["sh_"+str(1990+i)].row_values(row)[0:] 
        names["table_"+str(1990+i)].append(Y)
        
    print(str(1990+i)+"ok!")
# 归一化
scaler  = MinMaxScaler()

# 十篇标准睡美人数据归一化
s1 = []
s1.append(scaler.fit_transform(np.array(table[0][:103], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[1][:60], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[2][:116], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[3][:115], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[4][:82], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[5][:86], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[6][:58], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[7][:85], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[8][:75], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[9][:102], dtype=np.double).reshape(-1, 1)))

# 待测数据归一化处理
names = locals()
for i in range(0,21):
    
    # 每年创建一个空列表,存放归一化后的数据
    names["s2_"+str(1990+i)] = []
    
    # 每年有多少条数据
    names["num_table_"+str(1990+i)] = len(names["table_"+str(1990+i)])
    
    # 存储
    for j in range(0,names["num_table_"+str(1990+i)]):
        names["s2_"+str(1990+i)].append(scaler.fit_transform(np.array(names["table_"+str(1990+i)][j], dtype=np.double).reshape(-1, 1)))
    print(str(1990+i)+"over")