常用算法概念
1. 传统机器学习算法
SIFT (Scale-Invariant Feature Transform)
- 概念:尺度不变特征变换,提取图像中的关键点和描述符
- 特点:对尺度、旋转、光照变化具有不变性
HOG (Histogram of Oriented Gradients)
- 概念:方向梯度直方图,统计图像局部区域的梯度方向分布
- 特点:对光照变化和阴影有良好鲁棒性,常用于行人检测
SVM (Support Vector Machine)
- 概念:支持向量机,用于分类和回归分析
- 特点:在高维空间中有效,内存使用效率高
2. 深度学习算法
CNN (Convolutional Neural Networks)
- 概念:卷积神经网络,专为图像处理设计的深度学习模型
- 特点:自动学习图像特征,层次化特征提取
ResNet (Residual Networks)
- 概念:残差网络,通过跳跃连接解决深层网络训练问题
- 特点:可训练非常深的网络(超过100层)
MobileNet
- 概念:轻量级CNN架构,使用深度可分离卷积
- 特点:适合移动和嵌入式设备,计算效率高
简单示例
使用OpenCV和SVM进行简单图像分类
import cv2
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
def extract_hog_features(image):win_size = (64, 64)block_size = (16, 16)block_stride = (8, 8)cell_size = (8, 8)nbins = 9hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)features = hog.compute(image)return features.flatten()
positive_samples = []
negative_samples = []
positive_features = [extract_hog_features(img) for img in positive_samples]
negative_features = [extract_hog_features(img) for img in negative_samples]
X = np.vstack([positive_features, negative_features])
y = np.hstack([np.ones(len(positive_features)), np.zeros(len(negative_features))])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
classifier = svm.SVC(kernel='rbf')
classifier.fit(X_train, y_train)
def predict_image(image):features = extract_hog_features(image)prediction = classifier.predict([features])return prediction[0]
使用PyTorch实现简单CNN图像分类
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
class SimpleCNN(nn.Module):def __init__(self, num_classes=10):super(SimpleCNN, self).__init__()self.features = nn.Sequential(nn.Conv2d(3, 32, kernel_size=3, padding=1),nn.ReLU(),nn.MaxPool2d(2, 2),nn.Conv2d(32, 64, kernel_size=3, padding=1),nn.ReLU(),nn.MaxPool2d(2, 2),nn.Conv2d(64, 128, kernel_size=3, padding=1),nn.ReLU(),nn.AdaptiveAvgPool2d((4, 4)))self.classifier = nn.Sequential(nn.Linear(128 * 4 * 4, 512),nn.ReLU(),nn.Dropout(0.5),nn.Linear(512, num_classes))def forward(self, x):x = self.features(x)x = x.view(x.size(0), -1)x = self.classifier(x)return x
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):model.train()for epoch in range(num_epochs):running_loss = 0.0for i, (inputs, labels) in enumerate(train_loader):optimizer.zero_grad()outputs = model(inputs)loss = criterion(outputs, labels)loss.backward()optimizer.step()running_loss += loss.item()print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')
def predict(model, image):model.eval()with torch.no_grad():output = model(image.unsqueeze(0))_, predicted = torch.max(output, 1)return predicted.item()
使用预训练模型进行图像分类
import torch
from torchvision import models, transforms
from PIL import Image
model = models.resnet18(pretrained=True)
model.eval()
preprocess = transforms.Compose([transforms.Resize(256),transforms.CenterCrop(224),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
image = Image.open('example.jpg')
input_tensor = preprocess(image)
input_batch = input_tensor.unsqueeze(0)
with torch.no_grad():output = model(input_batch)
probabilities = torch.nn.functional.softmax(output[0], dim=0)
_, predicted_class = torch.max(output, 1)print(f"Predicted class index: {predicted_class.item()}")