w
# -*- coding: utf-8 -*-from nltk import *# TO FIX : No such file or directoryos.chdir(r‘E:\zpy‘)f = open(‘reviews_text_lt_3.txt‘, ‘r‘)f_r = f.read()strList = f_r.split(‘ ‘)fdist1 = FreqDist(strList)#总的词数print fdist1#表达式 keys()为我们提供了文本中所有不同类型的链表vocabulary1 = fdist1.keys()#通过切片看看这个链表的前 50 项res0_50 =vocabulary1[:50]print res0_50
C:\>python E:\zpy\wltp.py<FreqDist with 16789 samples and 180043 outcomes>[‘‘, ‘raining‘, ‘disappointing.It‘, ‘uncomfortable...‘, "lot‘s", ‘uv.\nSo,‘, ‘yellow‘, ‘Seller‘, ‘four‘, ‘vaporizers.I‘, ‘Does‘, ‘completely!!‘, ‘hanging‘, ‘Monday,‘, ‘asap!!This‘, ‘Until‘, ‘instead.The‘, ‘malfunctioned.‘, ‘Lately‘, ‘looking‘, ‘LAST‘, ‘eligible‘, ‘electricity‘, ‘DISAPPOINTED‘, ‘oneWorks‘, ‘powdery‘, ‘unanswered‘, ‘also.‘, ‘refun‘sooooo‘, ‘foul‘, ‘on\nafter‘, ‘fingers.‘, ‘advice:‘, ‘fingers,‘, ‘advice?‘, ‘each),‘, ‘month.I‘]C:\>
SELECT amz_review_textFROM amz_reviews_grab_usWHERE amz_review_rating < 3LIMIT 3000;
对于通过亚马逊us美国站的买家而言,在数据库前3000条的时间周期y-m-d内,在不考虑品类、价格、评分相对值等因素的情况下,
暂得出以下推测:
0-卖品属性为yellow,其他条件相同情况下,可能不受欢迎,评分相对低;
1-周一可能会给买家糟糕的购买体验,周一的促销活动须结合其他因素,如人文风俗、新闻事件慎重;
注:dev的当前视角