python 结合 Panadas && Numpy在百万条数据中取某一条数据并写入csv文件
python 结合 Panadas && Numpy在百万条数据中取某一条数据并写入csv文件
Panadas:是做数据处理。是python的一个数据分析包。
Numpy:是数值计算的扩展包,它能高效处理N维数组,复杂函数,线性代数。
import numpy as np
import pandas as pd
import csv
data_header_list =[
"x",
"y",
"speed"
]
def csv_writer(data_list, data_header_list, file_path):
"""Write data to CSV
"""
if not isinstance(data_list, list):
raise ValueError("data_list is no list")
if not isinstance(data_header_list, list):
raise ValueError("data_header_list is no list")
head = False
with open(file_path, 'a+') as csv_fi:
writer = csv.DictWriter(csv_fi, data_header_list)
reader = csv.reader(csv_fi)
'''判断是否第一次写入'''
try:
reader.next()
except StopIteration:
head = True
if head:
writer.writeheader()
writer.writerows(data_list)
else:
writer.writerows(data_list)
fin = np.loadtxt("/home/read.csv", dtype=np.str, delimiter=',') # 准备读取数据的csv文件
file_path = '/home/write.csv' #准备写入的csv文件
data_frame = pd.read_csv("/home/reference.csv") #参考数据的csv文件
data = fin[1:].tolist()
for list1 in data:
current_log_list = []
timestamp = int(float(list1[-1]))
at_id = list1[1]
try:
one_data = data_frame.loc[(data_frame['time'] == timestamp) & (data_frame['at_id'] == at_id)]
x = float(one_data['x'])
y = float(one_data['y'])
speed = float(one_data['speed_m_s'])
except Exception as e:
x = 0
y = 0
speed = 0
test_dict = {
"x": x,
"y": y,
"speed": speed
}
current_log_list.append(test_dict)
csv_writer(current_log_list, data_header_list, file_path)
time.sleep(0.001)