携程笔试大数据题,贝叶斯分类,一行代码,60%通过
更新一下ac了的第一题:
def getH(arr): feature = [i[0] for i in arr] target = [i[1] for i in arr] num_0 = target.count(0) num_1 = target.count(1) length = len(target) if num_0 and num_1: H_t = -(num_0/length*log2(num_0/length)+num_1/length*log2(num_1/length)) else: H_t = 0 return H_t def getGain(arr,n): H = getH(arr) feature = [i[0] for i in arr] target = [i[1] for i in arr] fs = [[i for i in l if i[0] == j] for j in set(feature)] g = 0 for i in fs: g += len(i)/n*getH(i) return H-g
#携程#