python-波峰


python-波峰

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
import random
# 读取,按照年份进行重新排序!!!
data = pd.read_csv("***.csv", low_memory=False)
data.head()
# 提取需要计算的数据,先观察
data.iloc[:,2:].head()
# 例如:离散系数计算
# 相当于excel先筛选出发表年份为1990年的数据,再选取某列数据进行计算
data[data["Publication Year"]==1990].iloc[:,2:].std(axis = 1, skipna = True)/data.iloc[:,2:].mean(axis = 1, skipna = True)

# 赋值
data.loc[data["Publication Year"]==1990,'Fluctuation_of_annual_citation'] = data[data["Publication Year"]==1990].iloc[:,2:].std(axis = 1, skipna = True)/data.iloc[:,2:].mean(axis = 1, skipna = True)
# 缺失值补充+存储到final_data中去
final_data.loc[:,'Fluctuation_of_annual_citation']=data["Fluctuation_of_annual_citation"].fillna("nan")
# 存储到csv文件
final_data.to_csv("final_data.csv")

波峰指标测度

data["Publication Year"].value_counts()

data.info()
# 查看出版年份对应的数据
year_i = 2010
data[data["Publication Year"]==year_i].iloc[:,23:]
# 插入空列,插入了30列
data[['bofeng_if_data_1990','bofeng_if_data_1991','bofeng_if_data_1992','bofeng_if_data_1993','bofeng_if_data_1994','bofeng_if_data_1995','bofeng_if_data_1996','bofeng_if_data_1997','bofeng_if_data_1998','bofeng_if_data_1999'
     ,'bofeng_if_data_2000','bofeng_if_data_2001','bofeng_if_data_2002','bofeng_if_data_2003','bofeng_if_data_2004','bofeng_if_data_2005','bofeng_if_data_2006','bofeng_if_data_2007','bofeng_if_data_2008','bofeng_if_data_2009'
     ,'bofeng_if_data_2010','bofeng_if_data_2011','bofeng_if_data_2012','bofeng_if_data_2013','bofeng_if_data_2014','bofeng_if_data_2015','bofeng_if_data_2016','bofeng_if_data_2017','bofeng_if_data_2018','bofeng_if_data_2019' 
     ]]=data.apply(lambda x:('','','','','','','','','','','','','','','','','','','','','','','','','','','','','',''),axis=1,result_type='expand')

data[data["Publication Year"]==year_i].shape
# 每年单独做一次处理,1990-2010年
# year_i是变量
data_year = data[data["Publication Year"]==year_i]
#data_year.reset_index(drop=True,inplace=True)
data_year
# 每年的数据量
hang_begin = 28885 # 开始行
n_of_rows = 6615 # 行数 
# 改进后,出版当年

j = year_i
for i in range(hang_begin,hang_begin+n_of_rows): # eg.0+497=497,2189+2996=5185
    if data_year[str(j)][i]>=max(data_year[str(j+1)][i],data_year[str(j+2)][i],data_year[str(j+3)][i]) and data_year[str(j)][i] > 2:
        data.loc[i,'bofeng_if_data_'+str(j)]= data_year[str(j)][i] #出版年份需要手动修改,存储到哪一列需要手动修改,是波峰的话,将数据存储
    else:
        data.loc[i,'bofeng_if_data_'+str(j)] = "0"
		
print("出版当年ok")
# 出版后第一年
j = year_i+1
for i in range(hang_begin,hang_begin+n_of_rows):
    if data_year[str(j)][i]>data_year[str(j-1)][i] and data_year[str(j)][i]>=max(data_year[str(j+1)][i],data_year[str(j+2)][i],data_year[str(j+3)][i]) and data_year[str(j)][i] > 2:
        data.loc[i,'bofeng_if_data_'+str(j)] = data_year[str(j)][i] #出版年份需要手动修改,存储到哪一列需要手动修改
    else:
        data.loc[i,'bofeng_if_data_'+str(j)] = "0"

print("出版后第一年ok")

# 出版后第二年
j = year_i+2
for i in range(hang_begin,hang_begin+n_of_rows):
    if data_year[str(j)][i]>max(data_year[str(j-2)][i],data_year[str(j-1)][i]) and data_year[str(j)][i]>=max(data_year[str(j+1)][i],data_year[str(j+2)][i],data_year[str(j+3)][i]) and data_year[str(j)][i] > 2:
        data.loc[i,'bofeng_if_data_'+str(j)] = data_year[str(j)][i] #出版年份需要手动修改
    else:
        data.loc[i,'bofeng_if_data_'+str(j)] = "0"
		
print("出版后第二年ok")
# 中间年份,每年为单位进行处理:除去出版当年、第一年、第二年···2016
# 前取到,后取不到;前面修改!!!!!!!!后面2017
for j in range(year_i+3,2017):
    for i in range(hang_begin,hang_begin+n_of_rows):
        if data_year[str(j)][i]>max(data_year[str(j-3)][i],data_year[str(j-2)][i],data_year[str(j-1)][i]) and data_year[str(j)][i]>=max(data_year[str(j+1)][i],data_year[str(j+2)][i],data_year[str(j+3)][i]) and data_year[str(j)][i] > 2:
            data.loc[i,'bofeng_if_data_'+str(j)] = data_year[str(j)][i] #出版年份需要手动修改
        else:
            data.loc[i,'bofeng_if_data_'+str(j)] = "0"
    print(str(j)+"ok!")
			
print("出版后中间年ok")
# 特别的几年2017
j = 2017
for i in range(hang_begin,hang_begin+n_of_rows):
    if data_year[str(j)][i]>max(data_year[str(j-3)][i],data_year[str(j-2)][i],data_year[str(j-1)][i]) and data_year[str(j)][i]>=max(data_year[str(j+1)][i],data_year[str(j+2)][i]) and data_year[str(j)][i] > 2:
        data.loc[i,'bofeng_if_data_'+str(j)] = data_year[str(j)][i] #出版年份需要手动修改iiiii
    else:
        data.loc[i,'bofeng_if_data_'+str(j)] = "0"
		
print("2017ok")
# 2018
j = 2018
for i in range(hang_begin,hang_begin+n_of_rows):
    if data_year[str(j)][i]>max(data_year[str(j-3)][i],data_year[str(j-2)][i],data_year[str(j-1)][i]) and data_year[str(j)][i]>=data_year[str(j+1)][i] and data_year[str(j)][i] > 2:
        data.loc[i,'bofeng_if_data_'+str(j)] = data_year[str(j)][i] #出版年份需要手动修改iiiii
    else:
        data.loc[i,'bofeng_if_data_'+str(j)] = "0"
		
print("2018ok")
# 如果需要保存
data.to_csv('linshi.csv')
print(str(year_i)+"保存成功!")