#!/usr/bin/env python # -*- encoding: utf-8 -*- # @Date : 2015-03-07 12:41:14 # @Author : NSSimacer # @Email : wuxiaoqiang1020@gmail.com # @Version : 1.0 import re def count_words(file_name): ''' 统计纯英文文本中单词出现的个数 ''' words = [] words_dict = {} lines_count = 0 with open(file_name, 'r') as f: for line in f: lines_count += 1 # 过滤非英文单词 words.extend(re.findall(r'[a-zA-Z0-9]+', line.strip())) for word in words: if word not in words_dict: words_dict[word] = 1 else: words_dict[word] += 1 return lines_count, len(words), words, words_dict if __name__ == '__main__': file_name = 'plain_text.txt' result = count_words(file_name) print 'In file:', file_name print 'Total Lines:', result[0] print 'Total Words:', result[1] print 'Words:', ', '.join(result[2]) print 'Frequency of each word:' for key, value in result[3].items(): print '%s: %s' % (key, value)
点赞 1

相关推荐

牛客网
牛客企业服务