import numpy as np
from sklearn.model_selection import train_test_split
X, y = np.arange(10).reshape((5, 2)), range(5)
print(X)
array([[0, 1],
[2, 3],
[4, 5],
[6, 7],
[8, 9]])
print(y)
[0, 1, 2, 3, 4]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
print(X_train)
array([[4, 5],
[0, 1],
[6, 7]])
print(y_train)
[2, 0, 3]
train_test_split(*arrays, test_size=None, train_size=None, random_state=None, shuffle=True, stratify=None)
shuffle: bool, default=True拆分前是否对数据进行洗牌。如果shuffle=False则stratify必须为None。
random_state: int default=None控制在应用拆分之前应用于数据的洗牌。传递一个int用于跨多个函数调用的可重复输出。