write()方法
只能往文件中写入字符串,所以在写入前,必须把可迭代对象中的数据转换成字符串(string)
点击查看代码
def count_words(filename):
"""Count the approximate number of all words and unique words in a file."""
try:
with open(filename, encoding='utf-8') as fileObj:
contents = fileObj.read()
except FileNotFoundError:
print(f"Sorry, the file {filename} does not exist.")
else:
# Count the approximate number of words in the file CNBC.txt.
listOfOutlineWords = contents.split()
# print(f"outline_words:{listOfOutlineWords}")
# print(f"The number of all the words in the file {filename} is: {num_of_words}.")
# uniqueOutlineWords = set(listOfOutlineWords)
numOfListOfOutlineWords = len(listOfOutlineWords) # delete repetitive words
print(numOfListOfOutlineWords)
toBeMatchedFile = 'Raw_High_School_Entrance.txt'#这里是被匹配的未进行处理的源文件
matchCounter = 0
listOfWordMatch = []#匹配的单词list
with open(toBeMatchedFile, encoding='utf-8') as matchFileObj:#打开被匹配的源文件
rawEntranceWords = matchFileObj.read().split()#把源文件中的数据存储进内存变量entrance_Words
numOfRawEntranceWords = len(rawEntranceWords)
numOfUniqueRawEntranceWords = len(set(rawEntranceWords))
# for word in rawEntranceWords:#匹配的顺序:用标准考纲匹配入学考试试卷
for word in listOfOutlineWords:#匹配的顺序:用标准考纲单词(word)去匹配入学考试试卷
if word in rawEntranceWords:
# if word not in unique_outline_words: writeToFilename = 'no_Matches_of_High_School_Entrance.csv'
print(f"(rawentrencewords:{len(rawEntranceWords)}")
print(f"(uniquerawentrencewords:{len(set(rawEntranceWords))}")
matchCounter += 1
listOfWordMatch.append(word)
totalOfWordMatch = len(listOfWordMatch)
listOfWordMatch.sort()#对LIST进行排序,用sort()方法会永久改变原LIST
# print(f"Sorted_Word_Matches: {sorted(listOfWordMatch)}")#经过排序的LIST,sorted()
# print(f"Sorted_Word_Matches: {listOfWordMatch}")
writeToFilename = 'Matches_of_High_School_Entrance.csv'
with open(writeToFilename, 'w') as write_filename_object:
for sortedWord in listOfWordMatch:
write_filename_object.write(sortedWord.title() + '\n')#每写入一个单词就换行
# print(f"The total word matches is: {len(listOfWordMatch)}.")
else:
# print(f"The word \'{word.upper()}\' you just entered does not appear in the file, Check please!")
pass
print(f"the total number of words in Entrance is: {len(rawEntranceWords)}")
print(f"The total of word matches is: {matchCounter}.")
print(f"The total matches of unique words is: {len(set(listOfWordMatch))}")
#在文件最后写入文字:The total matches of unique words is:
matchRate = totalOfWordMatch / numOfListOfOutlineWords
wordRate = numOfUniqueRawEntranceWords / numOfListOfOutlineWords
result = '{:.2%}'.format(matchRate)#output the wordRate in percentage.
# '{:.2%}' 两只耳朵,两片脸颊,两只嘴唇,一条舌头
print(f"matchRate: {result}")
print(f"wordrate: {wordRate}")
print(f"numOfUniqueRawEntranceWords: {numOfUniqueRawEntranceWords}")
print(f"numOfListOfOutlineWords:{numOfListOfOutlineWords}")
# writeToFilename = 'Matches_of_High_School_Entrance.txt'
writeToFilename = 'Matches_of_High_School_Entrance.csv'
with open(writeToFilename, 'a') as write_filename_object:
write_filename_object.write('\n' + '+' * 42)
write_filename_object.write('\n' + f"The total matches of UNIQUE words is: {totalOfWordMatch}, "
'\n' + f"The match wordRate is: {result}.") #这里的数据要进行格式化输出。
write_filename_object.write('\n' + '+' * 42)
"""从存放文件名的文件中读取要处理的文件名"""
# filenames = ['CNBC.txt', 'CNBC1.txt', 'CNBC2.txt']#我们可以把文件名放在文件里。
with open('filenames.txt', encoding='utf-8') as filenames_object:#filenames.txt之中是标准的高中考纲单词表名称
filenames_list = filenames_object.read()
filenames = filenames_list.split()
for filename in filenames:#add another file name to try.
count_words(filename)#add another two parameters for file name.