import numpy as np
import pandas as pd
withopen('./data/train.csv')asfile:
array_of_lines =file.readlines()
number_of_lines =len(array_of_lines)# 有100000行数据# 分割的话按照8:2分,8W行训练集,2W行测试集withopen('./data/train_train.csv','a')as file_train:for line in array_of_lines[0:80000]:
file_train.write(line)withopen('./data/train_test.csv','a')as file_test:
file_test.write(array_of_lines[0])for line in array_of_lines[80000:]:
file_test.write(line)
defsplit_heartbeat_signals(array_str):
length_of_array =len(array_str)
number_of_str =len(array_str[0].split(','))
return_mat = np.zeros((length_of_array, number_of_str))
index =0for line in array_str:
line = line.strip()
list_from_line = line.split(',')
return_mat[index,:]= list_from_line[0:number_of_str]
index +=1return return_mat
deftest_classify0_rate(test_dataset, test_label):
num_of_test =len(test_dataset)
correct_count =0.0for i inrange(num_of_test):
classifier_result = classify0(test_dataset[i], train_dataset[:10000], train_label[:10000],8)print("the classifier came back with: %d, the real answer is: %d"%(classifier_result, test_label[i]))if classifier_result == test_label[i]:
correct_count +=1.0print("the total correct rate is: %f"%(correct_count /float(num_of_test)))