fanfuhan OpenCV 教學110 ~ opencv-110-KMeans進行數據分類

fanfuhan OpenCV 教學110 ~ opencv-110-KMeans進行數據分類

fanfuhan OpenCV 教學110 ~ opencv-110-KMeans進行數據分類


資料來源: https://fanfuhan.github.io/

https://fanfuhan.github.io/2019/05/22/opencv-110/

GITHUB:https://github.com/jash-git/fanfuhan_ML_OpenCV


K-Means算法的作者是MacQueen, K-Means的算法是對數據進行分類的算法,採用的硬分類方式,是屬於非監督學習的算法,預先要求知道分為幾個類別,然後每個類別有一個中心點,根據距離度量來決定每個數據點屬於哪個類別標籤,一次循環實現對所有數據點分類之後,會根據標籤重新計算各個類型的中心位置,然後繼續循環數據集再次分類標籤樣本數據,如此不斷迭代,直到指定的循環數目或者前後兩次delta小於指定閾值,停止計算,得到最終各個樣本數據的標籤。


C++

#include <opencv2/opencv.hpp>
#include <iostream>

using namespace cv;
using namespace std;

int main(int argc, char** argv) {
	Mat img(500, 500, CV_8UC3);
	RNG rng(12345);

	Scalar colorTab[] = {
		Scalar(0, 0, 255),
		Scalar(255, 0, 0),
	};

	int numCluster = 2;
	int sampleCount = rng.uniform(5, 500);
	Mat points(sampleCount, 1, CV_32FC2);

	// 生成随机数
	for (int k = 0; k < numCluster; k++) {
		Point center;
		center.x = rng.uniform(0, img.cols);
		center.y = rng.uniform(0, img.rows);
		Mat pointChunk = points.rowRange(k*sampleCount / numCluster,
			k == numCluster - 1 ? sampleCount : (k + 1)*sampleCount / numCluster);
		rng.fill(pointChunk, RNG::NORMAL, Scalar(center.x, center.y), Scalar(img.cols*0.05, img.rows*0.05));
	}
	randShuffle(points, 1, &rng);

	// 使用KMeans
	Mat labels;
	Mat centers;
	kmeans(points, numCluster, labels, TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 10, 0.1), 3, KMEANS_PP_CENTERS, centers);

	// 用不同颜色显示分类
	img = Scalar::all(255);
	for (int i = 0; i < sampleCount; i++) {
		int index = labels.at<int>(i);
		Point p = points.at<Point2f>(i);
		circle(img, p, 2, colorTab[index], -1, 8);
	}

	// 每个聚类的中心来绘制圆
	for (int i = 0; i < centers.rows; i++) {
		int x = centers.at<float>(i, 0);
		int y = centers.at<float>(i, 1);
		printf("c.x= %d, c.y=%d", x, y);
		circle(img, Point(x, y), 40, colorTab[i], 1, LINE_AA);
	}

	imshow("KMeans-Data-Demo", img);
	waitKey(0);
	return 0;
}


Python

"""
KMeans进行数据分类
"""

import cv2 as cv
import numpy as np
from matplotlib import pyplot as plt

X = np.random.randint(25, 50, (25, 2))
Y = np.random.randint(60, 85, (25, 2))
pts = np.vstack((X, Y))

# 初始化数据
data = np.float32(pts)
print(data.shape)

# 定义停止条件
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 10, 1.0)

# kmeans分类
ret, label, center = cv.kmeans(data, 2, None, criteria, 2, cv.KMEANS_RANDOM_CENTERS)
print(label.shape)
print(center)

# 获取不同标签的点
A = data[label.ravel() == 0]
B = data[label.ravel() == 1]

# plot the data
plt.scatter(A[:, 0], A[:, 1])
plt.scatter(B[:, 0], B[:, 1], c='r')
plt.scatter(center[:, 0], center[:, 1], s=80, c='y', marker='s')
plt.xlabel("x1")
plt.ylabel("x2")
plt.show()

cv.waitKey(0)
cv.destroyAllWindows()

發表迴響

你的電子郵件位址並不會被公開。 必要欄位標記為 *