Computer Vision projects
Lernen Sie, wie Computer "sehen" und Bilder verstehen können. Von grundlegender Bildverarbeitung bis hin zu komplexen Deep Learning-Modellen für Objekterkennung und Klassifikation.
projects in diesem Guide
- •Bildklassifikation mit CNNs
- •Objekterkennung mit YOLO
- •Gesichtserkennung System
- •Bildverbesserung mit OpenCV
- •Style Transfer mit Neural Networks
- •Real-time Video Processing
Projekt 1: Bildklassifikation mit CNNs
Erstellen Sie ein Convolutional Neural Network, das Bilder automatisch in verschiedene Kategorien klassifiziert.
🎯 Projekt-Ziel
Ein CNN-Modell trainieren, das Bilder von Hunden und Katzen mit >90% Genauigkeit unterscheiden kann.
Setup und Datenvorverarbeitung
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
# GPU-Support prüfen
print("GPU available:", tf.config.list_physical_devices('GPU'))
# Datenaugmentation für bessere Generalisierung
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
zoom_range=0.2,
validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)
# Daten laden (angenommen: dogs-vs-cats Dataset)
train_generator = train_datagen.flow_from_directory(
'data/train',
target_size=(150, 150),
batch_size=32,
class_mode='binary',
subset='training'
)
validation_generator = train_datagen.flow_from_directory(
'data/train',
target_size=(150, 150),
batch_size=32,
class_mode='binary',
subset='validation'
)
print(f"Gefundene Klassen: {train_generator.class_indices}")
print(f"Trainingsbilder: {train_generator.samples}")
print(f"Validierungsbilder: {validation_generator.samples}")CNN-Modell erstellen
# CNN-Architektur definieren
model = models.Sequential([
# Erste Convolution-Schicht
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
layers.MaxPooling2D(2, 2),
# Zweite Convolution-Schicht
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D(2, 2),
# Dritte Convolution-Schicht
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D(2, 2),
# Vierte Convolution-Schicht
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D(2, 2),
# Flatten für Dense Layer
layers.Flatten(),
# Dropout gegen Overfitting
layers.Dropout(0.5),
# Dense Layer
layers.Dense(512, activation='relu'),
# Output Layer (binäre Klassifikation)
layers.Dense(1, activation='sigmoid')
])
# Modell kompilieren
model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy']
)
# Modell-Architektur anzeigen
model.summary()Training und Evaluation
# Callbacks für besseres Training
callbacks = [
tf.keras.callbacks.EarlyStopping(
monitor='val_accuracy',
patience=5,
restore_best_weights=True
),
tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.2,
patience=3,
min_lr=0.0001
)
]
# Modell trainieren
history = model.fit(
train_generator,
epochs=30,
validation_data=validation_generator,
callbacks=callbacks,
verbose=1
)
# Trainingsverlauf visualisieren
def plot_training_history(history):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
# Accuracy
ax1.plot(history.history['accuracy'], label='Training Accuracy')
ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
ax1.set_title('Model Accuracy')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()
# Loss
ax2.plot(history.history['loss'], label='Training Loss')
ax2.plot(history.history['val_loss'], label='Validation Loss')
ax2.set_title('Model Loss')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()
plt.tight_layout()
plt.show()
plot_training_history(history)
# Modell speichern
model.save('dogs_vs_cats_model.h5')
print("Modell gespeichert!")Projekt 2: Objekterkennung mit YOLO
Implementieren Sie ein YOLO (You Only Look Once) System für Real-time Objekterkennung in Bildern und Videos.
🚀 Was Sie lernen
YOLO-Architektur verstehen, vortrainierte Modelle nutzen und eigene Objekterkennungssysteme entwickeln.
YOLO mit OpenCV implementieren
import cv2
import numpy as np
import argparse
class YOLODetector:
def __init__(self, weights_path, config_path, names_path):
# YOLO-Netzwerk laden
self.net = cv2.dnn.readNet(weights_path, config_path)
# Klassennamen laden
with open(names_path, 'r') as f:
self.classes = [line.strip() for line in f.readlines()]
# Output-Layer bestimmen
layer_names = self.net.getLayerNames()
self.output_layers = [layer_names[i[0] - 1] for i in self.net.getUnconnectedOutLayers()]
# Farben für Bounding Boxes
self.colors = np.random.uniform(0, 255, size=(len(self.classes), 3))
def detect_objects(self, image, confidence_threshold=0.5, nms_threshold=0.4):
height, width, channels = image.shape
# Bild für YOLO vorbereiten
blob = cv2.dnn.blobFromImage(
image, scalefactor=1/255.0, size=(416, 416),
mean=(0, 0, 0), swapRB=True, crop=False
)
# Forward Pass
self.net.setInput(blob)
outputs = self.net.forward(self.output_layers)
# Erkennungen verarbeiten
boxes, confidences, class_ids = [], [], []
for output in outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > confidence_threshold:
# Bounding Box Koordinaten
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Box-Koordinaten berechnen
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# Non-Maximum Suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)
return boxes, confidences, class_ids, indices
def draw_detections(self, image, boxes, confidences, class_ids, indices):
if len(indices) > 0:
for i in indices.flatten():
x, y, w, h = boxes[i]
# Klasse und Konfidenz
label = f"{self.classes[class_ids[i]]}: {confidences[i]:.2f}"
color = self.colors[class_ids[i]]
# Bounding Box zeichnen
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
cv2.putText(
image, label, (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2
)
return imageuse YOLO for video stream
# YOLO-Detektor initialisieren
detector = YOLODetector(
weights_path='yolov3.weights',
config_path='yolov3.cfg',
names_path='coco.names'
)
def process_video(source=0): # 0 für Webcam, oder Pfad zu Video
cap = cv2.VideoCapture(source)
# Video-Properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Video-Writer für Output (optional)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, fps, (width, height))
while True:
ret, frame = cap.read()
if not ret:
break
# Objekterkennung
boxes, confidences, class_ids, indices = detector.detect_objects(frame)
# Erkennungen zeichnen
result_frame = detector.draw_detections(
frame.copy(), boxes, confidences, class_ids, indices
)
# FPS anzeigen
cv2.putText(
result_frame, f'FPS: {fps}', (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2
)
# Frame anzeigen
cv2.imshow('YOLO Object Detection', result_frame)
# Video speichern (optional)
out.write(result_frame)
# Beenden mit 'q'
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Aufräumen
cap.release()
out.release()
cv2.destroyAllWindows()
# Video-Processing starten
process_video(0) # Webcam verwendenProjekt 3: Gesichtserkennung System
Entwickeln Sie ein komplettes Gesichtserkennungssystem mit Gesichtserkennung, -encoding und -verifikation.
🔒 Anwendungen
Secureheitssysteme, Anwesenheitskontrolle, personalisierte Benutzererfahrungen.
Gesichtserkennung mit face_recognition
import face_recognition
import cv2
import numpy as np
import os
import pickle
from datetime import datetime
class FaceRecognitionSystem:
def __init__(self):
self.known_face_encodings = []
self.known_face_names = []
self.face_locations = []
self.face_encodings = []
self.face_names = []
def load_known_faces(self, faces_dir):
"""Bekannte Gesichter aus Ordner laden"""
print("Lade bekannte Gesichter...")
for filename in os.listdir(faces_dir):
if filename.endswith(('.jpg', '.jpeg', '.png')):
# Bild laden
image_path = os.path.join(faces_dir, filename)
image = face_recognition.load_image_file(image_path)
# Gesichts-Encoding extrahieren
face_encodings = face_recognition.face_encodings(image)
if face_encodings:
# Name aus Dateiname extrahieren
name = os.path.splitext(filename)[0]
self.known_face_encodings.append(face_encodings[0])
self.known_face_names.append(name)
print(f"Gesicht geladen: {name}")
else:
print(f"Kein Gesicht gefunden in: {filename}")
def save_encodings(self, filepath):
"""Encodings speichern"""
data = {
'encodings': self.known_face_encodings,
'names': self.known_face_names
}
with open(filepath, 'wb') as f:
pickle.dump(data, f)
print(f"Encodings gespeichert in: {filepath}")
def load_encodings(self, filepath):
"""Encodings laden"""
try:
with open(filepath, 'rb') as f:
data = pickle.load(f)
self.known_face_encodings = data['encodings']
self.known_face_names = data['names']
print(f"Encodings geladen von: {filepath}")
return True
except FileNotFoundError:
print(f"Encodings-Datei nicht gefunden: {filepath}")
return False
def recognize_faces_in_image(self, image):
"""Gesichter in einem Bild erkennen"""
# Bild von BGR zu RGB konvertieren
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Gesichter finden und encodieren
face_locations = face_recognition.face_locations(rgb_image)
face_encodings = face_recognition.face_encodings(rgb_image, face_locations)
face_names = []
for face_encoding in face_encodings:
# Gesicht mit bekannten Gesichtern vergleichen
matches = face_recognition.compare_faces(
self.known_face_encodings, face_encoding, tolerance=0.6
)
name = "Unbekannt"
# Beste Übereinstimmung finden
face_distances = face_recognition.face_distance(
self.known_face_encodings, face_encoding
)
if matches and len(face_distances) > 0:
best_match_index = np.argmin(face_distances)
if matches[best_match_index]:
name = self.known_face_names[best_match_index]
face_names.append(name)
return face_locations, face_namesReal-time Gesichtserkennung
def run_face_recognition():
# System initialisieren
fr_system = FaceRecognitionSystem()
# Bekannte Gesichter laden oder Encodings laden
if not fr_system.load_encodings('face_encodings.pkl'):
fr_system.load_known_faces('known_faces/')
fr_system.save_encodings('face_encodings.pkl')
# Webcam initialisieren
video_capture = cv2.VideoCapture(0)
# optimization: Nur jedes n-te Frame verarbeiten
process_this_frame = True
frame_count = 0
# Anwesenheitsliste
attendance_log = set()
while True:
ret, frame = video_capture.read()
# Frame-Größe reduzieren für bessere Performance
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
# Nur jedes zweite Frame verarbeiten
if process_this_frame:
# Gesichtserkennung durchführen
face_locations, face_names = fr_system.recognize_faces_in_image(small_frame)
# Koordinaten zurückskalieren
face_locations = [(top*4, right*4, bottom*4, left*4)
for (top, right, bottom, left) in face_locations]
process_this_frame = not process_this_frame
# Ergebnisse zeichnen
for (top, right, bottom, left), name in zip(face_locations, face_names):
# Bounding Box
color = (0, 255, 0) if name != "Unbekannt" else (0, 0, 255)
cv2.rectangle(frame, (left, top), (right, bottom), color, 2)
# Label
cv2.rectangle(frame, (left, bottom - 35), (right, bottom), color, cv2.FILLED)
cv2.putText(
frame, name, (left + 6, bottom - 6),
cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255), 1
)
# Anwesenheit loggen
if name != "Unbekannt" and name not in attendance_log:
attendance_log.add(name)
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Anwesenheit: {name} um {timestamp}")
# FPS und Frame-Info anzeigen
frame_count += 1
cv2.putText(
frame, f'Frame: {frame_count}', (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2
)
cv2.putText(
frame, f'Anwesend: {len(attendance_log)}', (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2
)
# Frame anzeigen
cv2.imshow('Gesichtserkennung', frame)
# Beenden mit 'q'
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Aufräumen
video_capture.release()
cv2.destroyAllWindows()
# Finale Anwesenheitsliste ausgeben
print("\nFinale Anwesenheitsliste:")
for person in attendance_log:
print(f"- {person}")
# System starten
if __name__ == "__main__":
run_face_recognition()Erweiterte Computer Vision Techniken
Style Transfer
Übertragen Sie den Stil eines Kunstwerks auf Ihre eigenen Bilder mit neuronalen Netzwerken.
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
# Pre-trained Style Transfer Modell laden
model = hub.load('https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2')
def load_and_preprocess_image(path, max_dim=512):
image = tf.io.read_file(path)
image = tf.image.decode_image(image, channels=3)
image = tf.image.convert_image_dtype(image, tf.float32)
# Größe anpassen
shape = tf.cast(tf.shape(image)[:-1], tf.float32)
long_dim = max(shape)
scale = max_dim / long_dim
new_shape = tf.cast(shape * scale, tf.int32)
image = tf.image.resize(image, new_shape)
image = image[tf.newaxis, :]
return image
def apply_style_transfer(content_path, style_path):
# Bilder laden
content_image = load_and_preprocess_image(content_path)
style_image = load_and_preprocess_image(style_path)
# Style Transfer anwenden
stylized_image = model(tf.constant(content_image), tf.constant(style_image))[0]
return stylized_image
# Beispiel-Verwendung
content_path = 'content_image.jpg'
style_path = 'style_image.jpg'
stylized = apply_style_transfer(content_path, style_path)
# Ergebnis anzeigen
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
plt.imshow(load_and_preprocess_image(content_path)[0])
plt.title('Content')
plt.axis('off')
plt.subplot(1, 3, 2)
plt.imshow(load_and_preprocess_image(style_path)[0])
plt.title('Style')
plt.axis('off')
plt.subplot(1, 3, 3)
plt.imshow(stylized[0])
plt.title('Stylized')
plt.axis('off')
plt.show()Image Segmentation
Segmentieren Sie Bilder auf Pixel-Ebene für präzise Objekterkennung.
import cv2
import numpy as np
from sklearn.cluster import KMeans
def semantic_segmentation_kmeans(image, k=3):
"""Simplee Segmentierung mit K-Means Clustering"""
# Bild in 2D Array umwandeln
data = image.reshape((-1, 3))
data = np.float32(data)
# K-Means Clustering
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 1.0)
_, labels, centers = cv2.kmeans(data, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
# Cluster-Zentren zu uint8 konvertieren
centers = np.uint8(centers)
# Segmentiertes Bild erstellen
segmented_data = centers[labels.flatten()]
segmented_image = segmented_data.reshape(image.shape)
return segmented_image, labels.reshape(image.shape[:2])
def watershed_segmentation(image):
"""Watershed algorithm for object segmentation"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Noise entfernen
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Morphologische Operationen
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# Hintergrund bestimmen
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# Vordergrund bestimmen
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
ret, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)
# Unbekannte Region
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# Marker erstellen
ret, markers = cv2.connectedComponents(sure_fg)
markers = markers + 1
markers[unknown == 255] = 0
# Watershed anwenden
markers = cv2.watershed(image, markers)
image[markers == -1] = [255, 0, 0] # Grenzen rot markieren
return image, markers
# Beispiel-Usage
image = cv2.imread('example.jpg')
# K-Means Segmentierung
segmented_kmeans, labels = semantic_segmentation_kmeans(image, k=4)
# Watershed Segmentierung
segmented_watershed, markers = watershed_segmentation(image.copy())
# Ergebnisse anzeigen
cv2.imshow('Original', image)
cv2.imshow('K-Means Segmentation', segmented_kmeans)
cv2.imshow('Watershed Segmentation', segmented_watershed)
cv2.waitKey(0)
cv2.destroyAllWindows()Performance-optimization
- ⚡GPU acceleration: Use CUDA and OpenCL for intensive calculations
- ⚡Reduce image size: Smaller resolutions for real-time processing
- ⚡Model Optimization: TensorRT, ONNX for deployment optimization
- ⚡Batch Processing: Mehrere Bilder gleichzeitig verarbeiten
Further projects
Fortgeschrittene projects
- →3D Objekterkennung und Tracking
- →Augmented Reality Anwendungen
- →Medizinische Bildanalyse
- →Autonomous Vehicle Vision
Empfohlene Tools
- •OpenCV für klassische CV
- •TensorFlow/PyTorch für Deep Learning
- •Detectron2 für Objekterkennung
- •MediaPipe für Real-time CV