Advanced classifier: support vector machine (SVM) and convex optimization

Advanced classifier: support vector machine (SVM) and convex optimization

Example 1:

import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm

x1 = [0, 1, 0, 1]; x2 = [0, 0, 1, 1]; y = [1, 1, 0, 0]
model1 = svm.SVC(kernel='linear').fit(list(zip(x1, x2)), y)
h = 0.01
xx, yy = np.meshgrid(np.arange(0, 1, h), np.arange(0, 1, h))
Z = model1.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(figsize=[2, 2])
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.5)
plt.show()

[external link picture transfer(img-IQjvsopC-1562748887803)(output_3_0.png)]

Example 2:

x1 = [0, 1, 0, 1, 0.5]; x2 = [0, 0, 1, 1, 0]
y2 = [1, 1, 0, 0, 0]
model2 = svm.SVC(C=4, kernel='linear').fit(list(zip(x1, x2)), y2)
Z2 = model2.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
plt.figure(figsize=[2, 2])
plt.contourf(xx, yy, Z2, cmap=plt.cm.coolwarm, alpha=0.5)
plt.show()

[failed to transfer the pictures in the external chain (img-5vbptwcy-1562748887804) (output_. PNG))

Example 3:

x1 = [0, 1, 0, 1]; x2 = [0, 0, 1, 1]
y3 = [0, 1, 1, 0]
model3 = svm.SVC(C=3, kernel='poly', gamma=1, coef0=1, degree=2)
model3.fit(list(zip(x1, x2)), y3)
SVC(C=3, cache_size=200, class_weight=None, coef0=1,
  decision_function_shape='ovr', degree=2, gamma=1, kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
Z3 = model3.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
plt.figure(figsize=[2, 2])
plt.contourf(xx, yy, Z3, cmap=plt.cm.coolwarm, alpha=0.5)
plt.show()


[failed to transfer the pictures in the external chain (img-x8qgofmz-1562748887804) (output_. PNG))

Load dataset

%matplotlib inline
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

os.chdir('Q:/data')

Extract the following fields for modeling

orgData = pd.read_csv('date_data2.csv')

X = orgData.iloc[:, :4]
y = orgData['Dated']

Build training and test sets

from sklearn.model_selection import train_test_split

train_data, test_data, train_target, test_target = train_test_split(
    X, y, test_size=0.2, train_size=0.8, random_state=1234)  

Using svm to build support vector machine model

from sklearn import svm

svc_model = svm.SVC(kernel='linear', gamma=0.5, C=0.5,
                   probability=True).fit(train_data, train_target)

Preliminary assessment

import sklearn.metrics as metrics

test_est = svc_model.predict(test_data)
print(metrics.classification_report(test_target, test_est))
             precision    recall  f1-score   support

          0       0.88      0.88      0.88         8
          1       0.92      0.92      0.92        12

avg / total       0.90      0.90      0.90        20

Standardization can improve the performance of Gaussian kernel svm

from sklearn import preprocessing

scaler = preprocessing.StandardScaler().fit(train_data)
train_scaled = scaler.transform(train_data)
test_scaled = scaler.transform(test_data)

svc_model1 = svm.SVC(kernel='rbf', gamma=0.5, C=0.5, 
               probability=True).fit(train_scaled, train_target)
test_est1 = svc_model1.predict(test_scaled)

print(metrics.classification_report(test_target, test_est1))
             precision    recall  f1-score   support

          0       1.00      0.88      0.93         8
          1       0.92      1.00      0.96        12

avg / total       0.95      0.95      0.95        20

Choose the best model

from sklearn.model_selection import ParameterGrid, GridSearchCV

kernel = ('linear', 'rbf')
gamma = np.arange(0.01, 1, 0.1)
C = np.arange(0.01, 1.0, 0.1)
grid = {'kernel': kernel, 'gamma': gamma, 'C': C}

svc_search = GridSearchCV(estimator=svm.SVC(), param_grid=grid, cv=3)
svc_search.fit(train_scaled, train_target)
GridSearchCV(cv=3, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'kernel': ('linear', 'rbf'), 'C': array([0.01, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91]), 'gamma': array([0.01, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)
svc_search.best_params_
{'C': 0.51, 'gamma': 0.01, 'kernel': 'linear'}
test_est2 = svc_search.predict(test_scaled)
print(metrics.classification_report(test_target, test_est2))
             precision    recall  f1-score   support

          0       1.00      0.88      0.93         8
          1       0.92      1.00      0.96        12

avg / total       0.95      0.95      0.95        20

In svm model, the graph of two variables can be used to improve perceptual knowledge, but it can not be extended to the case of more than two dimensions.

train_x = train_scaled[:, 1:3]
train_y = train_target.values
h = 0.01 # step size in the mesh
C = 1.0  # SVM regularization parameter
svc = svm.SVC(kernel='linear', C=C).fit(train_x, train_y)
rbf_svc = svm.SVC(kernel='rbf', gamma=0.5, C=C).fit(train_x, train_y)
poly_svc = svm.SVC(kernel='poly', degree=3, C=C).fit(train_x, train_y)
lin_svc = svm.LinearSVC(C=C).fit(train_x, train_y)

# create a mesh to plot in
x_min, x_max = train_x[:, 0].min() - 1, train_x[:, 0].max() + 1
y_min, y_max = train_x[:, 1].min() - 1, train_x[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), 
                     np.arange(y_min, y_max, h))

# title for the plots
titles = ['SVC with linear kernel',
          'LinearSVC (linear kernel)',
          'SVC with RBF kernel',
          'SVC with polynomial (degree 3) kernel']
plt.figure(figsize=(5, 5))
for i, clf in enumerate((svc, lin_svc, rbf_svc, poly_svc)):
    # Plot the decision boundary. For that, we will assign a color 
    # to eachpoint in the mesh [x_min, x_max]x[y_min, y_max].
    plt.subplot(2, 2, i + 1)
    plt.subplots_adjust(wspace=0.3, hspace=0.3)

    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.5)

    # Plot also the training points
    plt.scatter(train_x[:, 0], train_x[:, 1], c=train_y, 
                cmap=plt.cm.coolwarm)
    plt.xlabel('Attractive', {'fontsize': 9})
    plt.ylabel('Assets', {'fontsize': 9})
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())
    plt.title(titles[i], {'fontsize': 9})

plt.show()


[failed to transfer the pictures in the external chain (img-J4MR4br6-1562748887805)(output_28_0.png))

Posted on Sat, 02 Nov 2019 16:09:27 -0700 by happyness