支持向量机(support vector machine,简称SVM)于1964年由Vapnik和Chervonenkis建立,在上世纪90年代获得快速发展并衍生出一系列改进和扩展算法,在人像识别、文本分类、手写字识别及生物信息学等领域获得广泛应用。
class SMO(object): def __init__(self, C = 100, toler = 0.001, maxIter = 10000): self.C = C self.tol = toler self.maxIter = maxIter def fit(self, X, y): self.X, self.y = X, y self.n_samples = len(X) self.alphas = np.zeros(self.n_samples, dtype = float) self.b = 0. self.Error = np.zeros_like(self.alphas) self.iterNum = 0 iterNum = 0 examineAll = True alphaChanged = 0 while iterNum < self.maxIter and (alphaChanged > 0 or examineAll == True): alphaChanged = 0 if examineAll: for i in range(len(self.X)): alphaChanged = self._innerLoop(i) iterNum = 1 examineAll = False else: nonBoundInd = np.nonzero((self.alphas > 0) * (self.alphas < self.C))[0] for i in nonBoundInd: alphaChanged = self._innerLoop(i) iterNum = 1 if alphaChanged == 0: examineAll = True self.iterNum = iterNum return self def _innerLoop(self, i): Ei = self.updateError(i) if (((Ei * self.y[i] < -self.tol) and (self.alphas[i] < self.C)) or ((Ei * self.y[i] > self.tol) and (self.alphas[i] > 0))): j = self.selectJ(i) Ej = self.Error[j] alphaIold, alphaJold = self.alphas[i], self.alphas[j] if self.y[i] != self.y[j]: L = max(0, alphaIold - alphaJold) H = min(self.C, self.C alphaIold - alphaJold) else: L = max(0, alphaJold alphaIold -self.C) H = min(self.C, alphaJold alphaIold) if H == L: return 0 Kii, Kij, Kjj = (self.K(self.X[i], self.X[i]), self.K(self.X[i], self.X[j]), self.K(self.X[j], self.X[j])) eta = Kii Kjj - 2 * Kij if eta <= 0: return 0 self.alphas[i] = self.y[i] * (Ej - Ei)/eta if self.alphas[i] <= L: self.alphas[i] = L elif self.alphas[i] >= H: self.alphas[i] = H if np.abs(self.alphas[i] - alphaIold) < 1.e-10: return 0 self.alphas[j] = self.y[j] * self.y[i] * (alphaIold - self.alphas[i]) b0 = (self.b - Ej - self.y[j] * Kjj * (self.alphas[j] - alphaJold) - self.y[i] * Kij * (self.alphas[i] - alphaIold)) b1 = (self.b - Ei - self.y[j] * Kij * (self.alphas[j] - alphaJold) - self.y[i] * Kii * (self.alphas[i] - alphaIold)) if 0 < self.alphas[j] < self.C: self.b = b0 elif 0 < self.alphas[i] < self.C: self.b = b1 else: self.b = (b0 b1) / 2 return 1 else: return 0 def selectJ(self, i): j = 0 maxDeltaE = -1. priorIndices = np.nonzero(self.Error)[0] if len(priorIndices) > 1: for k in priorIndices: if k == i: continue Ek = self.updateError(k) deltaE = np.abs(Ek - self.Error[i]) if deltaE > maxDeltaE: j, maxDeltaE = k, deltaE return j else: j = np.random.choice([k for k in range(self.n_samples) if k != i]) self.updateError(j) return j def updateError(self, i): fxi = np.sum(self.alphas * self.y * np.array([self.K(self.X[i], self.X[j]) for j in range(self.n_samples)])) self.b self.Error[i] = fxi - self.y[i] return self.Error[i] def K(self, Xi, Xj): return np.sum(Xi * Xj) def predict(self, testX): num = len(testX) y_pred = np.ones(num, dtype = int) for i in range(num): fxi = np.sum(self.alphas * self.y * np.array([self.K(testX[i], self.X[j]) for j in range(self.n_samples)])) self.b if fxi < 0: y_pred[i] = -1 return y_pred
评论