#3 image의 기울기를 통한 magnitude, orientation, histogram

학부 수업 내용 정리/전기전자심화설계및소프트웨어실습

#3 image의 기울기를 통한 magnitude, orientation, histogram

supersumin 2024. 10. 5. 12:38

1. image의 기울기를 구하는 방법에 대해 알아보자: Edge Detection

1.1. Edge Detection과 기울기의 관계

Edge Detection은 이미지에서 급격한 밝기 변화가 발생하는 부분을 찾아내는 과정이다.

기울기는 이미지에서 픽셀 값(밝기 값)의 변화를 나타내기 때문에 기울기를 계산하면 Edge(경계선)을 효과적으로 검출할 수 있다.

1.2. Sobel(소벨) 필터 개념

Sobel 필터는 이미지의 밝기 변화, 즉 기울기(gradient)를 계산하기 위한 필터이다. 기본적으로 수평과 수직 방향에서 밝기 변화를 감지하는 두 개의 필터로 구성된다.

1.2.1. Sobel 필터 (x 방향 기울기)

x 방향의 기울기를 감지하기 때문에 x축과 평행한 Edge는 감지할 수 없다. x축과 평행한 Edge는 x축을 따라 일정한 값들로 Edge가 형성되기 때문이다.

반면에 y축과 평행한 Edge는 값이 갑작스럽게 변화하여 Sobel x 필터에서 강하게 감지된다.

즉, Sobel x 필터는 세로 방향의 Edge를 검출한다.

1.2.2. Sobel 필터 (y 방향 기울기)

y 방향의 기울기를 감지하기 때문에 y축과 평행한 Edge는 감지할 수 없다. y축과 평행한 Edge는 y축을 따라 일정한 값들로 Edge가 형성되기 때문이다.

반면에 x축과 평행한 Edge는 값이 갑작스럽게 변화하여 Sobel y 필터에서 강하게 감지된다.

즉, Sobel y 필터는 가로 방향의 Edge를 검출한다.

1.3. Code 예시

#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <math.h>

using namespace cv; // OpenCV의 모든 함수와 클래스를 사용할 때 cv::라는 접두사를 생략할 수 있다.

int filter_y[9] = { -1, -1, -1,0,0,0,1,1,1 };
int filter_x[9] = { -1, 0, 1,-1,0,1,-1,0,1 };

void EdgeDetection(Mat image, int height, int width);

int main()
{
	int height, width;
	Mat image = imread("C:/Users/Sumin/Desktop/Advanced Design of Electrical and Electronic Systems and Software Practice/Edge_Detection/Thunder.bmp", 0);	// -1: 원본 유지, 0: 흑백 이미지, 1: 컬러 이미지
	height = image.rows;
	width = image.cols;

	EdgeDetection(image, height, width);

	return 0;
}

void EdgeDetection(Mat image, int height, int width) {
	int x, y;	// image의 전체 pixel을 돌기 위한 index
	float conv_x, conv_y;	// x성분 필터와 y성분 필터를 씌운 값을 담을 변수
	int BLK = 3;	// BLOCK의 한 변의 길이
	int xx, yy; // 하나의 Block에서 돌기 위한 index
	int idx; // Sobel 필터를 돌기 위한 index
	float max, min;	// 컨볼루션한 값들에 대해 정규화 하기 위한 작업

	float** conv = (float**)calloc(height, sizeof(float*));	// conv한 값들을 받을 변수(image의 크기에 따라 동적할당해준다)
	for (int i = 0; i < height;i++) {
		conv[i] = (float*)calloc(width, sizeof(float));
	}

	float** magnitude = (float**)calloc(height, sizeof(float*));	// 각 pixel의 magnitude를 받을 임시 변수
	for (int i = 0; i < height;i++) {
		magnitude[i] = (float*)calloc(width, sizeof(float));
	}

	Mat imgEdge(height, width, CV_8UC1);	// Edge를 찾은 이미지를 담을 이미지

	for (y = 1;y < height-1;y++) {
		for (x = 1;x < width-1;x++) {
			conv_y = 0.0;
			conv_x = 0.0;
			idx = 0;
			for (xx = x - BLK / 2;xx <= x + BLK / 2;xx++) {
				for (yy = y - BLK / 2;yy <= y + BLK / 2;yy++) {
					if (xx >= 0 && yy >= 0 && yy < height && xx < width) {
						conv_y += filter_y[idx] * image.at<uchar>(yy, xx);
						conv_x += filter_x[idx] * image.at<uchar>(yy, xx);
					}
					idx++;
				}
			}
			magnitude[y][x] = sqrt(conv_x * conv_x + conv_y * conv_y);
		}
	}

	// max, min 값을 찾아 정규화 준비
	max = -FLT_MAX; // max를 0으로 설정하면 {-1, -42, -541}인 상황에서, 0보다 작지만 max인 표본이 있으므로 최소로 맞춰준다. 
	min = FLT_MAX; // min을 0으로 설정하면 {1, 42, 541}인 상황에서, 0보다 크지만 min은 1인 상황이 존재한다. 
	for (y = 0;y < height;y++) {
		for (x = 0;x < width; x++) {
			if (max < magnitude[y][x]) max = magnitude[y][x];
			if (min > magnitude[y][x]) min = magnitude[y][x];
		}
	}

	// 정규화된 값 넣기
	for (y = 0;y < height;y++) {
		for (x = 0;x < width;x++) {
			imgEdge.at<uchar>(y, x) = (int)(255 * (magnitude[y][x] - min) / (max - min));
		}
	}

	imwrite("Edge.bmp", imgEdge);
}

1.3.1. image의 gradient 값을 받을 변수를 동적할당

float** conv = (float**)calloc(height, sizeof(float*));	// conv한 값들을 받을 변수(image의 크기에 따라 동적할당해준다)
for (int i = 0; i < height;i++) {
	conv[i] = (float*)calloc(width, sizeof(float));
}

float** magnitude = (float**)calloc(height, sizeof(float*));	// 각 pixel의 magnitude를 받을 임시 변수
for (int i = 0; i < height;i++) {
	magnitude[i] = (float*)calloc(width, sizeof(float));
}

2차원 배열 동적할당의 경우 행에 대한 포인터를 먼저 동적할당한 뒤, 열에 대해 동적할당을 수행해준다.

1.3.2. 최종적으로 그림을 넣을 Mat 변수 선언

Mat imgEdge(height, width, CV_8UC1);	// Edge를 찾은 이미지를 담을 이미지

흑백이미지를 저장할 공간을 설정한다.

Mat imgEdge(height, width, CV_8UC3);	// Edge를 찾은 이미지를 담을 이미지

컬러 이미지를 저장할 공간을 설정한다. 이 경우 흑백이미지로 바꿔주기 위해서는 BRG 채널에 똑같은 intensity를 넣어주는 방법이 있다.

1.3.3. Gradient 계산

for (y = 0;y < height;y++) {
	for (x = 0;x < width;x++) {
		conv_y = 0.0;
		conv_x = 0.0;
		idx = 0;
		for (xx = x - BLK / 2;xx <= x + BLK / 2;xx++) {
			for (yy = y - BLK / 2;yy <= y + BLK / 2;yy++) {
				if (xx >= 0 && yy >= 0 && yy < height && xx < width) {
					conv_y += filter_y[idx] * image.at<uchar>(yy, xx);
					conv_x += filter_x[idx] * image.at<uchar>(yy, xx);
				}
				idx++;
			}
		}
		magnitude[y][x] = sqrt(conv_x * conv_x + conv_y * conv_y);
	}
}

x, y: 전체 pixel을 돌기 위한 index
xx, yy: Block 내부를 돌기 위한 index
idx: filter의 index는 따로 선언, 0~8의 값을 가지기 때문
각 Block을 돌 때마다 conv_x, conv_y의 값과 idx의 값을 새로 받아야 하기 때문에 Block을 돌기 전에 초기화 해줘야 한다.
dix는 이중 for문을 돌 때마다 값이 커지므로 if 문 밖에 있어야 한다.
이중 for문이 끝나면 magnitude 값을 받을 수 있다.

* 경계 조건에 대한 고찰 *

경계 조건은 필터의 크기 때문에 이미지의 경계를 처리할 때 고려해야 하는 대상이다. 예를 들어 Sebel 필터의 경우에도 3x3이기 때문에 (0, 0)에서 필터 연산을 시작하면 두 가지의 문제가 생긴다.

경계 밖으로 넘어가는 경우: 필터가 이미지의 경계를 넘어가면 경계 밖의 pixel 값은 0일 수도 있으며, 쓰레기 값일 수다. 이러한 값들은 계산에 의미가 없다.
필터 일부의 경계 안쪽 값만 사용하는 경우: 필터의 일부만 사용한다면 의미있는 결과를 보장하지 않는다. 경계 안쪽에 있는 픽셀과 필터의 일부만 사용한다면 Edge 정보를 왜곡시킨다.

이러한 문제를 해결하는 방법은 두 가지의 종류가 있다.

필터의 크기에 맞춘 내부 픽셀만 계산: 필터의 크기에 맞춰 이미지 내부의 픽셀만 계산하는 방법이다. 예를 들어 3x3 필터의 경우 (1, 1)에서 시작하여 경계를 넘어가지 않도록 내부에서만 연산하는 경우이다.
경계 외부 값을 설정: 이미지 경계를 넘어가는 부분에 대해 피셀 값을 0으로 복사하거나 대칭을 적용하여 경계 밖의 값을 추정하는 방식이다.

1.3.4. Min-Max 정규화

// max, min 값을 찾아 정규화 준비
max = -FLT_MAX; // max를 0으로 설정하면 {-1, -42, -541}인 상황에서, 0보다 작지만 max인 표본이 있으므로 최소로 맞춰준다. 
min = FLT_MAX; // min을 0으로 설정하면 {1, 42, 541}인 상황에서, 0보다 크지만 min은 1인 상황이 존재한다. 
for (y = 0;y < height;y++) {
	for (x = 0;x < width; x++) {
		if (max < magnitude[y][x]) max = magnitude[y][x];
		if (min > magnitude[y][x]) min = magnitude[y][x];
	}
}

// 정규화된 값 넣기
for (y = 0;y < height;y++) {
	for (x = 0;x < width;x++) {
		imgEdge.at<uchar>(y, x) = (int)(255 * (magnitude[y][x] - min) / (max - min));
	}
}

min: pixel을 돌며 현재까지 본 값 중 가장 작은 값을 찾아야 하므로, 초기값은 가장 큰 값으로 설정해야 한다.
max: pixel을 돌며 현재까지 본 값 중 가장 큰 값을 찾아야 하므로, 초기값은 가장 작은 값으로 설정해야 한다.
(값-min)/(max-min)은 0~1 사이로 값을 정규화 한다.

2. Orientation Analysis

2.1. Orientation Analysis는 Edge Detection에 비해 뭐가 좋길래 하는 건가요?

2.1.1. 원본에 비해 기울어진 사진을 다르게 인식하는 Edge Detection의 한계

Orientation Analysis는 이미지의 기울기 성분 뿐만 아니라 각 pixel의 기울기 또한 분석한다. 분석된 기울기에 해당하는 크기만큼 각 기울기 성분에 더해주게 되어, 각 픽셀에 대해 어느 방향으로 어느 정도의 크기가 있다는 정보를 제공해준다.

반면에 Edge Detection은 주로 경계만 찾고 만약 두 개의 이미지를 비교할 때 기울어져 있다면 서로 다르게 인식한다. 각 크기만 비교하기 때문이다. 하지만 Orientatino Analsis는 "여기 픽셀에는 이 기울기에 이만큼 성분의 크기가 있으니 같은 픽셀일 가능성이 높다!"라고 한다.

예시:

예를 들어 기울어진 "A"의 이미지가 있을 때, Edge Detection은 각 픽셀의 x성분, y성분의 변화량과 그 크기만 보여준다. 이 경우 기울어진 "A"의 형태를 인식하지 못할 수 있다. 하지만 Orientation Analysis는 각 pixel에 대해 기울기가 얼마나 크고 어떤 방향으로 있는지를 분석할 수 있어 얼마나 기울어진 "A"인지도 알 수 있다.

2.2. Orientation Analysis은 각 픽셀에 대해 해당 각도의 어느 정도의 크기가 있는지를 제공한다.

Orientation Analysis는 이미지의 각 pixel에 대해 기울기의 방향뿐만 아니라 해당 방향의 강도도 제공한다. 이는 특정 방향으로 얼마나 뚜렷한 경계가 있는지를 알 수 있게 해준다.

2.3. Code 예시

#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <math.h>
#include <stdio.h>
#include <vector>

#define PI 3.14159265358979323846264338327950288419716939937510582097494459230781640628620899

using namespace cv;

int filter_y[9] = { -1, -1, -1, 0, 0, 0, 1, 1, 1 };
int filter_x[9] = { -1, 0, 1, -1, 0, 1, -1, 0, 1 };

void orientation(Mat image, int height, int width);

int main()
{
	int height, width;
	Mat image = imread("white2.bmp", 0); // -1: 원본 유지, 0: 흑백 이미지, 1: 컬러 이미지

	height = image.rows;
	width = image.cols;

	orientation(image, height, width);

	return 0;
}

void orientation(Mat image, int height, int width) {
	int x, y;
	int xx, yy;
	int idx;
	float conv_x, conv_y;
	int BLK = 3;
	float dir;
	int bin;
	int i = 0;
	float max, min;
	float hist[9] = { 0 };

	// 각 pixel의 magnitude를 받을 변수
	float** magnitude = (float**)calloc(height, sizeof(float*));
	for (int i = 0; i < height;i++) {
		magnitude[i] = (float*)calloc(width, sizeof(float));
	}

	// 첫 번째 단계: conv 계산
	for (y = BLK / 2;y < height - BLK / 2;y++) {
		for (x = BLK / 2;x < width - BLK / 2;x++); {
			conv_y = 0.0;
			conv_x = 0.0;
			idx = 0;
			for (xx = x - BLK / 2;xx <= x + BLK / 2;xx++) {
				for (yy = y - BLK / 2;yy <= y + BLK / 2;yy++) {
					if (xx >= 0 && yy >= 0 && yy < height && xx < width) {
						conv_y += filter_y[idx] * image.at<uchar>(yy, xx);
						conv_x += filter_x[idx] * image.at<uchar>(yy, xx);
					}
					idx++;
				}
			}
			magnitude[y][x] = sqrt(conv_x * conv_x + conv_y * conv_y);

			// orientation 구하기
			dir = 180.0 * atan2(conv_y, conv_x) / PI;
			if (dir < 0) dir += 180;

			// interval 20으로 나누기
			// bin: 특정 범위의 데이터를 저장하는 구간, 특정한 값들의 범위 안에 들어가는 데이터를 모아 관리하는 데이터 통, 구간
			bin = (int)(dir / 20.0);
			if (bin == 9) bin = 0;

			// 해당 방향 bin에 magnitude 추가
			hist[bin] += magnitude[y][x];

		}
	}
	// 히스토그램 출력
	for (i = 0; i < 9; i++) {
		printf("Bin %d: %.2f\n", i, hist[i]);
	}
	/* 각 필터의 성분이 서로 반대이기 때문에, 결과는 실제 이미지에 대해
	𝑦
		=
		𝑥
		y = x에 대해 대칭된 형태로 나타난다.데 예시로는 22도 이미지면 bin이 어느 정도~135도면 어느 정도 예시들어서 설명*/

		// magnitude 동적할당 해제
	for (i = 0;i < height;i++) {
		free(magnitude[i]);

	}
	free(magnitude);

}

2.3.1. magnitude 계산

for (y = BLK / 2;y < height - BLK / 2;y++) {
		for (x = BLK / 2;x < width - BLK / 2;x++); {
			conv_y = 0.0;
			conv_x = 0.0;
			idx = 0;
			for (xx = x - BLK / 2;xx <= x + BLK / 2;xx++) {
				for (yy = y - BLK / 2;yy <= y + BLK / 2;yy++) {
					if (xx >= 0 && yy >= 0 && yy < height && xx < width) {
						conv_y += filter_y[idx] * image.at<uchar>(yy, xx);
						conv_x += filter_x[idx] * image.at<uchar>(yy, xx);
					}
					idx++;
				}
			}
			magnitude[y][x] = sqrt(conv_x * conv_x + conv_y * conv_y);

첫 번째, 두 번째 for문에서 필터가 유효한 값을 유지하기 위한 index를 설정하였다. 제로 패딩이 안 되어있고 필터의 크기에 맞춰서 코딩해야 되는 경우를 예시로 들었다.

2.3.2. 9개로 구성된 bin에 해당되는 magnitude 넣기

// orientation 구하기
dir = 180.0 * atan2(conv_y, conv_x) / PI;
if (dir < 0) dir += 180;

// interval 20으로 나누기
// bin: 특정 범위의 데이터를 저장하는 구간, 특정한 값들의 범위 안에 들어가는 데이터를 모아 관리하는 데이터 통, 구간
bin = (int)(dir / 20.0);
if (bin == 9) bin = 0;

// 해당 방향 bin에 magnitude 추가
hist[bin] += magnitude[y][x];

atan2(): 반환되는 값은 라디안 단위의 각도로, -π에서 π 사이의 값을 가진다. 즉, -180도에서 180도 사이의 값이다.
각도가 음수인 경우 +180도 해주면 각도는 똑같으므로 음수는 이렇게 처리해준다.
20.0으로 나눠주면 값에 따라 자신의 index를 스스로 정해준다.
결국 자신의 index에 해당되는 magnitude를 histogram에 넣어준다.

3. HOG(Histograms of Oriented Gradients)

3.1. HOG란?

HOG란 이미지에서 특징을 추출하는 데 사용되는 방법이다.

HOG는 이미지의 각 pixel에서 magnitude와 orientation을 계산한 후, 각 orientation에 해당하는 값의 크기만큼 magnitude를 중첩하여 더하는 방식으로 특징을 추출한다.

이후 histogram(히스토그램)으로 표현할 때 여러 방법이 사용된다.

이미지 전체를 9개의 방향(bin)으로 나누어 magnitude들을 합산하여 하나의 histogram을 구성하는 방식
각 Block마다 해당 블록 bin값에 magnitude를 더해 여러 개의 bin으로 histogram을 구성하는 방식
특정 pixel에 대한 bin만 계산하여 부분적인 histogram을 구성하는 방식

이 방식은 이미지의 경계와 모양 정보를 잘 포착하여 기하학적 특성을 효과적으로 표현할 수 있다.

3.2. Block L-2 normalization

Block L-2 정규화는 HOG에서 중요한 단계이다.

이미지의 각 pixel에서 magnitude가 클수록 무조건 중요한 것으로 간주하는 것이 아니라, 주변 pixel들과의 상대적인 크기에 의미를 두는 정규화 방식이다.

한 Block에서의 magnitude 합 즉, 벡터의 모든 요소의 제곱합의 제곱근이다.

한 블록 내의 모든 셀에서 나오는 magnitude 값들의 제곱 합의 루트(즉, L-2 노름)로 각 bin의 중첩된 magnitude의 합을 나누는 방식

3.3. Code 예시

#define _CRT_SECURE_NO_WARNINGS

#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <math.h>
#include <stdio.h>
#include <vector>

#define PI 3.14159265358979323846264338327950288419716939937510582097494459230781640628620899
#define epsilon 0.001

#define BLK 3 // Filter의 크기
#define HOG_BLK 16 // histogram의 block

using namespace cv;

int filter_y[9] = { -1, -1, -1, 0, 0, 0, 1, 1, 1 };
int filter_x[9] = { -1, 0, 1, -1, 0, 1, -1, 0, 1 };

void calculateMagnitudeAndOrientation(Mat image, float** magnitude, float** orientation, int height, int width);
void calculateHOG(float** magnitude, float** orientation, float* histogram, int height, int width);
void normalizeBlock(float* histogram, int offset);
void saveHistogramToFile(float* histogram, const char* filename, int size);
void freeMemory(float** magnitude, float** orientation, int height);



int main()
{
	int height, width;
	Mat image = imread("lecture3.bmp", 0); // -1: 원본 유지, 0: 흑백 이미지, 1: 컬러 이미지
	// image가 안 받아졌다면 함수를 실행하지 마시오.
	if (image.empty()) {
		printf("Image loading failed!\n");
		return -1;
	}

	height = image.rows;
	width = image.cols;

	// 동적 메모리 할당	
	float** magnitude = (float**)calloc(height, sizeof(float*)); // 각 pixel의 magnitude를 저장할 변수
	float** orientation = (float**)calloc(height, sizeof(float*)); 	// 각 pixel의 orientation을 저장할 변수
	for (int i = 0; i < height;i++) {
		magnitude[i] = (float*)calloc(width, sizeof(float));
		orientation[i] = (float*)calloc(width, sizeof(float));
	}
	float* histogram = (float*)calloc(945, sizeof(float)); 	// 한 block 당 9개의 bin을 받아 총 945개의 bin을 구성하는 histogram array

	// magnitude와 orientation 계산
	calculateMagnitudeAndOrientation(image, magnitude, orientation, height, width);

	// HOG 계산 및 정규화
	calculateHOG(magnitude, orientation, histogram, height, width);

	// 히스토그램을 파일에 저장
	saveHistogramToFile(histogram, "lecture3.csv", 945);

	// 메모리 해제
	void freeMemory(float** magnitude, float** orientation, int height);
	free(histogram);

	return 0;
}

void calculateMagnitudeAndOrientation(Mat image, float** magnitude, float** orientation, int height, int width) {
	int x, y;
	int xx, yy;
	int idx;
	float conv_x, conv_y, dir;

	for (y = BLK / 2; y < height - BLK / 2; y++) {
		for (x = BLK / 2; x < width - BLK / 2; x++) {
			conv_x = 0.0;
			conv_y = 0.0;
			idx = 0;

			for (xx = x - BLK / 2; xx <= x + BLK / 2; xx++) {
				for (yy = y - BLK / 2; yy <= y + BLK / 2; yy++) {
					conv_x += filter_x[idx] * image.at<uchar>(yy, xx);
					conv_y += filter_y[idx] * image.at<uchar>(yy, xx);
					idx++;
				}
			}

			magnitude[y][x] = sqrt(conv_x * conv_x + conv_y * conv_y);

			// 방향 계산
			dir = 180.0 * atan2(conv_y, conv_x) / PI;
			if (dir < 0) dir += 180;

			orientation[y][x] = dir; // 각도를 orientation에 저장
		}
	}
}

void calculateHOG(float** magnitude, float** orientation, float* histogram, int height, int width) {
	int x, y;
	int xx, yy;
	int bin;
	int histogram_index;
	int offset = 0;

	for (y = 0; y <= height - HOG_BLK;y += HOG_BLK / 2) {
		for (x = 0;x <= width - HOG_BLK;x += HOG_BLK / 2) {
			for (yy = y;yy < y + HOG_BLK;yy++) {
				for (xx = x;xx < x + HOG_BLK;xx++) {
					// 각 픽셀의 bin 값을 지정해주기	
					bin = (int)(orientation[yy][xx] / 20.0);
					if (bin == 9) bin = 0;
					histogram_index = bin + offset;
					histogram[histogram_index] += magnitude[yy][xx];
				}
			}
			// Block 정규화
			normalizeBlock(histogram, offset);
			offset += 9;
		}
	}
}

void normalizeBlock(float* histogram, int offset) {
	float norm = 0.0;
	int i = 0;
	
	// L-2 Norm 계산
	for (i = 0;i < 9;i++) {
		norm += histogram[i + offset] * histogram[i + offset];
	}
	norm = sqrt(norm + epsilon);

	// 정규화
	for (i = 0;i < 9;i++) {
		histogram[i + offset] = histogram[i + offset] / norm;
	}
}

// 히스토그램을 파일에 저장
void saveHistogramToFile(float* histogram, const char* filename, int size) {
	FILE* file = fopen(filename, "w");
	if (file != NULL) {
		for (int i = 0; i < size; i++) {
			fprintf(file, "%d, %.4f\n", i, histogram[i]);
		}
		fclose(file);
	}
	else {
		printf("file open fail\n");
	}
}

// 동적 메모리 해제
void freeMemory(float** magnitude, float** orientation, int height) {
	for (int i = 0; i < height; i++) {
		free(magnitude[i]);
		free(orientation[i]);
	}
	free(magnitude);
	free(orientation);
}

3.3.1. magnitude와 orientation 계산

void calculateMagnitudeAndOrientation(Mat image, float** magnitude, float** orientation, int height, int width) {
	int x, y;
	int xx, yy;
	int idx;
	float conv_x, conv_y, dir;

	for (y = BLK / 2; y < height - BLK / 2; y++) {
		for (x = BLK / 2; x < width - BLK / 2; x++) {
			conv_x = 0.0;
			conv_y = 0.0;
			idx = 0;

			for (xx = x - BLK / 2; xx <= x + BLK / 2; xx++) {
				for (yy = y - BLK / 2; yy <= y + BLK / 2; yy++) {
					conv_x += filter_x[idx] * image.at<uchar>(yy, xx);
					conv_y += filter_y[idx] * image.at<uchar>(yy, xx);
					idx++;
				}
			}

			magnitude[y][x] = sqrt(conv_x * conv_x + conv_y * conv_y);

			// 방향 계산
			dir = 180.0 * atan2(conv_y, conv_x) / PI;
			if (dir < 0) dir += 180;

			orientation[y][x] = dir; // 각도를 orientation에 저장
		}
	}
}

필터의 경계에 크기에 맞춰 내부 필터를 돌고 있으며, magnitude와 orientation을 구하고 있다.

3.3.2. HOG 계산 및 정규화

void calculateHOG(float** magnitude, float** orientation, float* histogram, int height, int width) {
	int x, y;
	int xx, yy;
	int bin;
	int histogram_index;
	int offset = 0;

	for (y = 0; y <= height - HOG_BLK;y += HOG_BLK / 2) {
		for (x = 0;x <= width - HOG_BLK;x += HOG_BLK / 2) {
			for (yy = y;yy < y + HOG_BLK;yy++) {
				for (xx = x;xx < x + HOG_BLK;xx++) {
					// 각 픽셀의 bin 값을 지정해주기	
					bin = (int)(orientation[yy][xx] / 20.0);
					if (bin == 9) bin = 0;
					histogram_index = bin + offset;
					histogram[histogram_index] += magnitude[yy][xx];
				}
			}
			// Block 정규화
			normalizeBlock(histogram, offset);
			offset += 9;
		}
	}
}

void normalizeBlock(float* histogram, int offset) {
	float norm = 0.0;
	int i = 0;
	
	// L-2 Norm 계산
	for (i = 0;i < 9;i++) {
		norm += histogram[i + offset] * histogram[i + offset];
	}
	norm = sqrt(norm + epsilon);

	// 정규화
	for (i = 0;i < 9;i++) {
		histogram[i + offset] = histogram[i + offset] / norm;
	}
}

모든 pixel을 9개의 bin으로 구성하는 것이 아닌, 설정한 Block 크기마다 bin을 추출하여 이미지의 특징을 추출한다.
각 pixel의 bin 값을 계산해, histogram의 index로 활용하여 histogram에 넣어준다. 이 때, offset을 이용하여 한 블럭을 넘어갈 때마다 추가해준다.
그로 인해, histogram에는 현재 축적된 magnitude가 있고 이를 norm을 통해 정규화해줘야 한다.
norm에는 각 bin에 해당하는 histogram에 제곱합의 제곱근이 들어있다.
이를 현재 histogram의 값에 나눠준다.

이로 인해 945개의 bin으로 구성된 histogram을 구할 수 있다.

'학부 수업 내용 정리 > 전기전자심화설계및소프트웨어실습' 카테고리의 다른 글

#10 Image Segmentation (0)	2024.12.08
#9 Face Verification with landmark points (0)	2024.11.15
#2 Image Resizing && Ratation (0)	2024.09.11
#1 Introduction to Computer Vision (4)	2024.09.06

현재글#3 image의 기울기를 통한 magnitude, orientation, histogram

supersumin 님의 tistory

머리속 정리하 우와악

Today :
Yesterday :

supersumin 님의 tistory