学习项目来源:MOOC机器学习之聚类算法DBSCAN
链接: https://www.icourse163.org/learn/BIT-1001872001#/learn/content?type=detail&id=1002854139&cid=1003247036
源代码:
import numpy as np
from sklearn.cluster import DBSCAN
import sklearn.cluster as skc
# from sklearn import metrics
# import matplotlib.pyplot as plt
mac2id=dict()
onlinetimes=[]
f=open(r"D:/2018_BigData/Python/Python_Book/3MOOC_MachineLearningInPython/Cluster/TestData.txt",
encoding='gb18030')
for line in f:
mac=line.split(",")[2]
onlinetime=int(line.split(",")[6])
starttime=int(line.split(",")[4].split(" ")[1].split(":")[0]) #读取每条数据中的mac地址、开始上网时间、上网时长
if mac not in mac2id: # mac2id是一个字典:key是mac地址,value是对应mac地址的上网时长以及开始上网时间
mac2id[mac]=len(onlinetimes)
onlinetimes.append((starttime,onlinetime))
else:
onlinetimes[mac2i