我正在尝试使用 GridSearchCV 在这个带有不可分离样本的简单 SVM 问题中选择最佳 C 值。我遇到的问题是,当我运行代码时,选择的最佳 C 非常小(~e-18),以便将边距扩大到包含所有样本。即使我更改了样本以使它们易于分离,最佳 C 仍然在 e-18 的范围内。GridSearchCV 选择了一个非常小的 C,但是我尝试更改样本。有谁知道为什么会这样?
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets.samples_generator import make_blobs
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
X, y = make_blobs(n_samples = 500, centers = 2, random_state = 6,
cluster_std = 1.2)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(X[:,0], X[:,1], c = y, cmap = 'rainbow', s = 30,
edgecolors = 'white')
ax.set_xlabel(r'$x_1$', fontsize = 20)
ax.set_ylabel(r'$x_2$', fontsize = 20)
svc = SVC(kernel = 'linear')
c_space = np.logspace(-20, 1, 50)
param_grid = {'C': c_space}
svc_cv = GridSearchCV(svc, param_grid, cv = 5)
svc_cv.fit(X, y)
c = svc_cv.best_params_['C']
svc.C = c
svc.fit(X, y)
support_vecs = svc.support_vectors_
x1_min = min(X[:,0])
x1_max = max(X[:,0])
x2_min = min(X[:,1])
x2_max = max(X[:,1])
x1 = np.linspace(x1_min, x1_max, 100)
x2 = np.linspace(x2_min, x2_max, 100)
X1, X2 = np.meshgrid(x1, x2)
points = np.vstack([X1.ravel(), X2.ravel()]).T
boundary = svc.decision_function(points).reshape(X1.shape)
ax.contour(X1, X2, boundary, colors = 'k', levels = [-1, 0, 1],
linestyles = ['--', '-', '--'])
ax.scatter(support_vecs[:,0], support_vecs[:,1], s = 250, linewidth = 1,
facecolors = 'none', edgecolors = 'k')
```