fanfuhan OpenCV 教學104 ~ opencv-104-HOG-SVM分類訓練
fanfuhan OpenCV 教學104 ~ opencv-104-HOG-SVM分類訓練
資料來源: https://fanfuhan.github.io/
https://fanfuhan.github.io/2019/05/21/opencv-104/
GITHUB:https://github.com/jash-git/fanfuhan_ML_OpenCV
對於得到的結構化HOG特徵數據,我們就可以通過初始化SVM進行回歸分類訓練,這裡採用的訓練器是SVM線性分類器,SVM還有另外一個分類器就是對於線性不可分數據的徑向分類器。OpenCV中使用徑向分類器SVM有時候會訓練很長時間,而且結果很糟糕,甚至會報一些莫名其妙的錯誤,感覺不是特別好。所以推薦大家真對線性不可分的問題可以選擇神經網絡ANN模塊。
C++
#include <opencv2/opencv.hpp> #include <iostream> using namespace cv; using namespace cv::ml; using namespace std; string positive_dir = "D:/images/train_data/elec_watch/positive/"; string negative_dir = "D:/images/train_data/elec_watch/negative/"; void get_hog_descripor(Mat &image, vector<float> &desc); void generate_dataset(Mat &trainData, Mat &labels); void svm_train(Mat &trainData, Mat &labels); int main(int argc, char** argv) { Mat trainData = Mat::zeros(Size(3780, 26), CV_32FC1); Mat labels = Mat::zeros(Size(1, 26), CV_32SC1); generate_dataset(trainData, labels); svm_train(trainData, labels); Ptr<SVM> svm = SVM::load("D:/vcworkspaces/hog_elec.yml"); Mat test_img = imread("D:/images/train_data/elec_watch/test/test_01.png"); // Mat test_img = imread("D:/images/train_data/elec_watch/positive/box_01.bmp"); imshow("test image", test_img); vector<float> fv; get_hog_descripor(test_img, fv); Mat one_row = Mat::zeros(Size(fv.size(), 1), CV_32FC1); for (int i = 0; i < fv.size(); i++) { one_row.at<float>(0, i) = fv[i]; } float result = svm->predict(one_row); printf("\n prediction result : %.2f \n", result); waitKey(0); return 0; } void svm_train(Mat &trainData, Mat &labels) { printf("\n start SVM training... \n"); Ptr< SVM > svm = SVM::create(); /* Default values to train SVM */ svm->setGamma(5.383); svm->setKernel(SVM::LINEAR); svm->setC(2.67); svm->setType(SVM::C_SVC); svm->train(trainData, ROW_SAMPLE, labels); clog << "...[done]" << endl; // save xml svm->save("D:/vcworkspaces/hog_elec.yml"); } void get_hog_descripor(Mat &image, vector<float> &desc) { HOGDescriptor hog; int h = image.rows; int w = image.cols; float rate = 64.0 / w; Mat img, gray; resize(image, img, Size(64, int(rate*h))); cvtColor(img, gray, COLOR_BGR2GRAY); Mat result = Mat::zeros(Size(64, 128), CV_8UC1); result = Scalar(127); Rect roi; roi.x = 0; roi.width = 64; roi.y = (128 - gray.rows) / 2; roi.height = gray.rows; gray.copyTo(result(roi)); hog.compute(result, desc, Size(8, 8), Size(0, 0)); } void generate_dataset(Mat &trainData, Mat &labels) { vector<string> images; glob(positive_dir, images); int pos_num = images.size(); for (int i = 0; i < images.size(); i++) { Mat image = imread(images[i].c_str()); vector<float> fv; get_hog_descripor(image, fv); printf("image path : %s, feature data length: %d \n", images[i].c_str(), fv.size()); for (int j = 0; j < fv.size(); j++) { trainData.at<float>(i, j) = fv[j]; } labels.at<int>(i, 0) = 1; } images.clear(); glob(negative_dir, images); for (int i = 0; i < images.size(); i++) { Mat image = imread(images[i].c_str()); vector<float> fv; get_hog_descripor(image, fv); printf("image path : %s, feature data length: %d \n", images[i].c_str(), fv.size()); for (int j = 0; j < fv.size(); j++) { trainData.at<float>(i + pos_num, j) = fv[j]; } labels.at<int>(i + pos_num, 0) = -1; } }
Python
""" HOG-SVM分类训练 """ import cv2 as cv import os import numpy as np def get_hog_descriptor(image): hog = cv.HOGDescriptor() h, w = image.shape[:2] rate = 64 / w image = cv.resize(image, (64, np.int(rate*h))) gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY) bg = np.zeros((128, 64), dtype=np.uint8) bg[:,:] = 127 h, w = gray.shape dy = (128 - h) // 2 bg[dy:h+dy,:] = gray cv.waitKey(0) # 64x128 = 3780 fv = hog.compute(bg, winStride=(8, 8), padding=(0, 0)) return fv def generate_dataset(pdir, ndir): train_data = [] labels = [] for file_name in os.listdir(pdir): img_dir = os.path.join(pdir, file_name) img = cv.imread(img_dir) hog_desc = get_hog_descriptor(img) one_fv = np.zeros([len(hog_desc)], dtype=np.float32) for i in range(len(hog_desc)): one_fv[i] = hog_desc[i][0] train_data.append(one_fv) labels.append(1) for file_name in os.listdir(ndir): img_dir = os.path.join(ndir, file_name) img = cv.imread(img_dir) hog_desc = get_hog_descriptor(img) one_fv = np.zeros([len(hog_desc)], dtype=np.float32) for i in range(len(hog_desc)): one_fv[i] = hog_desc[i][0] train_data.append(one_fv) labels.append(-1) return np.array(train_data, dtype=np.float32), np.array(labels, dtype=np.int32) def svm_train(positive_dir, negative_dir): svm = cv.ml.SVM_create() svm.setKernel(cv.ml.SVM_LINEAR) svm.setType(cv.ml.SVM_C_SVC) svm.setC(2.67) svm.setGamma(5.383) trainData, responses = generate_dataset(positive_dir, negative_dir) responses = np.reshape(responses, [-1, 1]) svm.train(trainData, cv.ml.ROW_SAMPLE, responses) svm.save('svm_data.dat') def elec_detect(image): hog_desc = get_hog_descriptor(image) print(len(hog_desc)) one_fv = np.zeros([len(hog_desc)], dtype=np.float32) for i in range(len(hog_desc)): one_fv[i] = hog_desc[i][0] one_fv = np.reshape(one_fv, [-1, len(hog_desc)]) print(len(one_fv), len(one_fv[0])) svm = cv.ml.SVM_load('svm_data.dat') result = svm.predict(one_fv)[1] print(result) if __name__ == '__main__': svm_train("images/elec_watch/positive/", "images/elec_watch/negative/") cv.waitKey(0) # test_img = cv.imread("images/elec_watch/test/scene_01.jpg") # elec_detect(test_img) # cv.destroyAllWindows()