python文件转换问题
文件的打开:
file = open("data.txt", "r", encoding="UTF-8")
其中第一个标签为文件的位置和名称,如果放在项目一级目录下,则不需要指定路径:
文本数据清洗:
import csv import py2neo from py2neo import Graph,Node,Relationship,NodeMatcher # -*-coding:utf-8-*- import re #清洗文本数据 lineList = [] file = open("data.txt", "r", encoding='utf-8') while 1: line = file.readline() if not line: print("Read file End or Error") break line2 = line.replace(',', ' ') #将,换成空格 line2 = line2.replace('"', '') #将”“删除 lineList.append(line2) file.close() file = open(r'data.txt', 'w', encoding='UTF-8') for i in lineList: file.write(i) file.close()
清洗前后对比:
文本txt转xls:
# -*-coding:utf-8-*- import xlwt # import openpyxl #文本txt转excel # 1.创建对象 # 2.创建sheet # 3.准备数据 # 4.几行几列 file = open("data.txt", "r", encoding="UTF-8") a1 = file.readlines() workbook1 = xlwt.Workbook(encoding="UTF-8") worksheet1 = workbook1.add_sheet('dataX') for index,row in enumerate(a1): d=row.split(' ') for col in range(len(d)): worksheet1.write(index+1,col+1,d[col]) workbook1.save('datax.xls') # def write_line_excel(): # work_book=openpyxl.Workbook() # sheet=work_book.create_sheet('new_data') # data=open('data.txt', 'r', encoding='utf-8') # datas=data.readlines() # for index,row in enumerate(datas): # d=row.split(',') # for col in range(len(d)): # sheet.cell(index+1,col+1,d[col]) # # work_book.save('data1.xls') # write_line_excel()
xls转成csv:
########xls转成csv文件############ import pandas as pd def xlsx_to_csv_pd(xls_file): data_xls = pd.read_excel(xls_file, index_col=0) csv_file = xls_file.split('.')[0] print(csv_file) data_xls.to_csv(csv_file + '.csv', encoding="GBK") xlsx_to_csv_pd('data1.xls')
这里注意:csv默认使用GBK,而不是utf-8,否则会出现乱码