Computer Vision Projekte
Lernen Sie praktische Computer Vision Anwendungen mit Python, OpenCV und TensorFlow. Von Bildklassifikation mit CNNs bis zu YOLO Objekterkennung und fortgeschrittenen Deep Learning Techniken für die Bildanalyse.
Projekte in diesem Guide
- •Bildklassifikation mit CNNs
- •Objekterkennung mit YOLO
- •Gesichtserkennung System
- •Bildverbesserung mit OpenCV
- •Style Transfer mit Neural Networks
- •Real-time Video Processing
Projekt 1: Bildklassifikation mit CNNs
Erstellen Sie ein Convolutional Neural Network, das Bilder automatisch in verschiedene Kategorien klassifiziert. Ein perfekter Einstieg in die Deep Learning Bildverarbeitung.
🎯 Projekt-Ziel
Ein CNN-Modell trainieren, das Bilder von Hunden und Katzen mit >90% Genauigkeit unterscheiden kann.
Setup und Datenvorverarbeitung
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
# GPU-Support prüfen
print("GPU available:", tf.config.list_physical_devices('GPU'))
# Datenaugmentation für bessere Generalisierung
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
zoom_range=0.2,
validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)
# Daten laden (angenommen: dogs-vs-cats Dataset)
train_generator = train_datagen.flow_from_directory(
'data/train',
target_size=(150, 150),
batch_size=32,
class_mode='binary',
subset='training'
)
validation_generator = train_datagen.flow_from_directory(
'data/train',
target_size=(150, 150),
batch_size=32,
class_mode='binary',
subset='validation'
)
print(f"Gefundene Klassen: {train_generator.class_indices}")
print(f"Trainingsbilder: {train_generator.samples}")
print(f"Validierungsbilder: {validation_generator.samples}")CNN-Modell erstellen
# CNN-Architektur definieren
model = models.Sequential([
# Erste Convolution-Schicht
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
layers.MaxPooling2D(2, 2),
# Zweite Convolution-Schicht
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D(2, 2),
# Dritte Convolution-Schicht
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D(2, 2),
# Vierte Convolution-Schicht
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D(2, 2),
# Flatten für Dense Layer
layers.Flatten(),
# Dropout gegen Overfitting
layers.Dropout(0.5),
# Dense Layer
layers.Dense(512, activation='relu'),
# Output Layer (binäre Klassifikation)
layers.Dense(1, activation='sigmoid')
])
# Modell kompilieren
model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy']
)
# Modell-Architektur anzeigen
model.summary()Training und Evaluation
# Callbacks für besseres Training
callbacks = [
tf.keras.callbacks.EarlyStopping(
monitor='val_accuracy',
patience=5,
restore_best_weights=True
),
tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.2,
patience=3,
min_lr=0.0001
)
]
# Modell trainieren
history = model.fit(
train_generator,
epochs=30,
validation_data=validation_generator,
callbacks=callbacks,
verbose=1
)
# Trainingsverlauf visualisieren
def plot_training_history(history):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
# Accuracy
ax1.plot(history.history['accuracy'], label='Training Accuracy')
ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
ax1.set_title('Model Accuracy')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()
# Loss
ax2.plot(history.history['loss'], label='Training Loss')
ax2.plot(history.history['val_loss'], label='Validation Loss')
ax2.set_title('Model Loss')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()
plt.tight_layout()
plt.show()
plot_training_history(history)
# Modell speichern
model.save('dogs_vs_cats_model.h5')
print("Modell gespeichert!")Projekt 2: Objekterkennung mit YOLO
Implementieren Sie ein YOLO (You Only Look Once) System für Real-time Objekterkennung in Bildern und Videos. Perfekt für Computer Vision Anwendungen in der Praxis.
🚀 Was Sie lernen
YOLO-Architektur verstehen, vortrainierte Modelle nutzen und eigene Objekterkennungssysteme entwickeln.
YOLO mit OpenCV implementieren
import cv2
import numpy as np
import argparse
class YOLODetector:
def __init__(self, weights_path, config_path, names_path):
# YOLO-Netzwerk laden
self.net = cv2.dnn.readNet(weights_path, config_path)
# Klassennamen laden
with open(names_path, 'r') as f:
self.classes = [line.strip() for line in f.readlines()]
# Output-Layer bestimmen
layer_names = self.net.getLayerNames()
self.output_layers = [layer_names[i[0] - 1] for i in self.net.getUnconnectedOutLayers()]
# Farben für Bounding Boxes
self.colors = np.random.uniform(0, 255, size=(len(self.classes), 3))
def detect_objects(self, image, confidence_threshold=0.5, nms_threshold=0.4):
height, width, channels = image.shape
# Bild für YOLO vorbereiten
blob = cv2.dnn.blobFromImage(
image, scalefactor=1/255.0, size=(416, 416),
mean=(0, 0, 0), swapRB=True, crop=False
)
# Forward Pass
self.net.setInput(blob)
outputs = self.net.forward(self.output_layers)
# Erkennungen verarbeiten
boxes, confidences, class_ids = [], [], []
for output in outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > confidence_threshold:
# Bounding Box Koordinaten
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Box-Koordinaten berechnen
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# Non-Maximum Suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)
return boxes, confidences, class_ids, indices
def draw_detections(self, image, boxes, confidences, class_ids, indices):
if len(indices) > 0:
for i in indices.flatten():
x, y, w, h = boxes[i]
# Klasse und Konfidenz
label = f"{self.classes[class_ids[i]]}: {confidences[i]:.2f}"
color = self.colors[class_ids[i]]
# Bounding Box zeichnen
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
cv2.putText(
image, label, (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2
)
return imageuse YOLO for video stream
# YOLO-Detektor initialisieren
detector = YOLODetector(
weights_path='yolov3.weights',
config_path='yolov3.cfg',
names_path='coco.names'
)
def process_video(source=0): # 0 für Webcam, oder Pfad zu Video
cap = cv2.VideoCapture(source)
# Video-Properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Video-Writer für Output (optional)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, fps, (width, height))
while True:
ret, frame = cap.read()
if not ret:
break
# Objekterkennung
boxes, confidences, class_ids, indices = detector.detect_objects(frame)
# Erkennungen zeichnen
result_frame = detector.draw_detections(
frame.copy(), boxes, confidences, class_ids, indices
)
# FPS anzeigen
cv2.putText(
result_frame, f'FPS: {fps}', (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2
)
# Frame anzeigen
cv2.imshow('YOLO Object Detection', result_frame)
# Video speichern (optional)
out.write(result_frame)
# Beenden mit 'q'
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Aufräumen
cap.release()
out.release()
cv2.destroyAllWindows()
# Video-Processing starten
process_video(0) # Webcam verwendenProjekt 3: Gesichtserkennung System
Entwickeln Sie ein komplettes Gesichtserkennungssystem mit Gesichtserkennung, -encoding und -verifikation.
🔒 Anwendungen
Sicherheitssysteme, Anwesenheitskontrolle, personalisierte Benutzererfahrungen.
Gesichtserkennung mit face_recognition
import face_recognition
import cv2
import numpy as np
import os
import pickle
from datetime import datetime
class FaceRecognitionSystem:
def __init__(self):
self.known_face_encodings = []
self.known_face_names = []
self.face_locations = []
self.face_encodings = []
self.face_names = []
def load_known_faces(self, faces_dir):
"""Bekannte Gesichter aus Ordner laden"""
print("Lade bekannte Gesichter...")
for filename in os.listdir(faces_dir):
if filename.endswith(('.jpg', '.jpeg', '.png')):
# Bild laden
image_path = os.path.join(faces_dir, filename)
image = face_recognition.load_image_file(image_path)
# Gesichts-Encoding extrahieren
face_encodings = face_recognition.face_encodings(image)
if face_encodings:
# Name aus Dateiname extrahieren
name = os.path.splitext(filename)[0]
self.known_face_encodings.append(face_encodings[0])
self.known_face_names.append(name)
print(f"Gesicht geladen: {name}")
else:
print(f"Kein Gesicht gefunden in: {filename}")
def save_encodings(self, filepath):
"""Encodings speichern"""
data = {
'encodings': self.known_face_encodings,
'names': self.known_face_names
}
with open(filepath, 'wb') as f:
pickle.dump(data, f)
print(f"Encodings gespeichert in: {filepath}")
def load_encodings(self, filepath):
"""Encodings laden"""
try:
with open(filepath, 'rb') as f:
data = pickle.load(f)
self.known_face_encodings = data['encodings']
self.known_face_names = data['names']
print(f"Encodings geladen von: {filepath}")
return True
except FileNotFoundError:
print(f"Encodings-Datei nicht gefunden: {filepath}")
return False
def recognize_faces_in_image(self, image):
"""Gesichter in einem Bild erkennen"""
# Bild von BGR zu RGB konvertieren
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Gesichter finden und encodieren
face_locations = face_recognition.face_locations(rgb_image)
face_encodings = face_recognition.face_encodings(rgb_image, face_locations)
face_names = []
for face_encoding in face_encodings:
# Gesicht mit bekannten Gesichtern vergleichen
matches = face_recognition.compare_faces(
self.known_face_encodings, face_encoding, tolerance=0.6
)
name = "Unbekannt"
# Beste Übereinstimmung finden
face_distances = face_recognition.face_distance(
self.known_face_encodings, face_encoding
)
if matches and len(face_distances) > 0:
best_match_index = np.argmin(face_distances)
if matches[best_match_index]:
name = self.known_face_names[best_match_index]
face_names.append(name)
return face_locations, face_namesReal-time Gesichtserkennung
def run_face_recognition():
# System initialisieren
fr_system = FaceRecognitionSystem()
# Bekannte Gesichter laden oder Encodings laden
if not fr_system.load_encodings('face_encodings.pkl'):
fr_system.load_known_faces('known_faces/')
fr_system.save_encodings('face_encodings.pkl')
# Webcam initialisieren
video_capture = cv2.VideoCapture(0)
# Optimierung: Nur jedes n-te Frame verarbeiten
process_this_frame = True
frame_count = 0
# Anwesenheitsliste
attendance_log = set()
while True:
ret, frame = video_capture.read()
# Frame-Größe reduzieren für bessere Performance
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
# Nur jedes zweite Frame verarbeiten
if process_this_frame:
# Gesichtserkennung durchführen
face_locations, face_names = fr_system.recognize_faces_in_image(small_frame)
# Koordinaten zurückskalieren
face_locations = [(top*4, right*4, bottom*4, left*4)
for (top, right, bottom, left) in face_locations]
process_this_frame = not process_this_frame
# Ergebnisse zeichnen
for (top, right, bottom, left), name in zip(face_locations, face_names):
# Bounding Box
color = (0, 255, 0) if name != "Unbekannt" else (0, 0, 255)
cv2.rectangle(frame, (left, top), (right, bottom), color, 2)
# Label
cv2.rectangle(frame, (left, bottom - 35), (right, bottom), color, cv2.FILLED)
cv2.putText(
frame, name, (left + 6, bottom - 6),
cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255), 1
)
# Anwesenheit loggen
if name != "Unbekannt" and name not in attendance_log:
attendance_log.add(name)
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Anwesenheit: {name} um {timestamp}")
# FPS und Frame-Info anzeigen
frame_count += 1
cv2.putText(
frame, f'Frame: {frame_count}', (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2
)
cv2.putText(
frame, f'Anwesend: {len(attendance_log)}', (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2
)
# Frame anzeigen
cv2.imshow('Gesichtserkennung', frame)
# Beenden mit 'q'
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Aufräumen
video_capture.release()
cv2.destroyAllWindows()
# Finale Anwesenheitsliste ausgeben
print("\nFinale Anwesenheitsliste:")
for person in attendance_log:
print(f"- {person}")
# System starten
if __name__ == "__main__":
run_face_recognition()Erweiterte Computer Vision Techniken
Style Transfer
Übertragen Sie den Stil eines Kunstwerks auf Ihre eigenen Bilder mit neuronalen Netzwerken.
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
# Pre-trained Style Transfer Modell laden
model = hub.load('https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2')
def load_and_preprocess_image(path, max_dim=512):
image = tf.io.read_file(path)
image = tf.image.decode_image(image, channels=3)
image = tf.image.convert_image_dtype(image, tf.float32)
# Größe anpassen
shape = tf.cast(tf.shape(image)[:-1], tf.float32)
long_dim = max(shape)
scale = max_dim / long_dim
new_shape = tf.cast(shape * scale, tf.int32)
image = tf.image.resize(image, new_shape)
image = image[tf.newaxis, :]
return image
def apply_style_transfer(content_path, style_path):
# Bilder laden
content_image = load_and_preprocess_image(content_path)
style_image = load_and_preprocess_image(style_path)
# Style Transfer anwenden
stylized_image = model(tf.constant(content_image), tf.constant(style_image))[0]
return stylized_image
# Beispiel-Verwendung
content_path = 'content_image.jpg'
style_path = 'style_image.jpg'
stylized = apply_style_transfer(content_path, style_path)
# Ergebnis anzeigen
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
plt.imshow(load_and_preprocess_image(content_path)[0])
plt.title('Content')
plt.axis('off')
plt.subplot(1, 3, 2)
plt.imshow(load_and_preprocess_image(style_path)[0])
plt.title('Style')
plt.axis('off')
plt.subplot(1, 3, 3)
plt.imshow(stylized[0])
plt.title('Stylized')
plt.axis('off')
plt.show()Image Segmentation
Segmentieren Sie Bilder auf Pixel-Ebene für präzise Objekterkennung.
import cv2
import numpy as np
from sklearn.cluster import KMeans
def semantic_segmentation_kmeans(image, k=3):
"""Einfache Segmentierung mit K-Means Clustering"""
# Bild in 2D Array umwandeln
data = image.reshape((-1, 3))
data = np.float32(data)
# K-Means Clustering
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 1.0)
_, labels, centers = cv2.kmeans(data, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
# Cluster-Zentren zu uint8 konvertieren
centers = np.uint8(centers)
# Segmentiertes Bild erstellen
segmented_data = centers[labels.flatten()]
segmented_image = segmented_data.reshape(image.shape)
return segmented_image, labels.reshape(image.shape[:2])
def watershed_segmentation(image):
"""Watershed algorithm for object segmentation"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Noise entfernen
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Morphologische Operationen
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# Hintergrund bestimmen
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# Vordergrund bestimmen
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
ret, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)
# Unbekannte Region
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# Marker erstellen
ret, markers = cv2.connectedComponents(sure_fg)
markers = markers + 1
markers[unknown == 255] = 0
# Watershed anwenden
markers = cv2.watershed(image, markers)
image[markers == -1] = [255, 0, 0] # Grenzen rot markieren
return image, markers
# Beispiel-Usage
image = cv2.imread('example.jpg')
# K-Means Segmentierung
segmented_kmeans, labels = semantic_segmentation_kmeans(image, k=4)
# Watershed Segmentierung
segmented_watershed, markers = watershed_segmentation(image.copy())
# Ergebnisse anzeigen
cv2.imshow('Original', image)
cv2.imshow('K-Means Segmentation', segmented_kmeans)
cv2.imshow('Watershed Segmentation', segmented_watershed)
cv2.waitKey(0)
cv2.destroyAllWindows()Performance-Optimierung
- ⚡GPU acceleration: Use CUDA and OpenCL for intensive calculations
- ⚡Reduce image size: Smaller resolutions for real-time processing
- ⚡Model Optimization: TensorRT, ONNX for deployment optimization
- ⚡Batch Processing: Mehrere Bilder gleichzeitig verarbeiten
Further projects
Fortgeschrittene Projekte
- →3D Objekterkennung und Tracking
- →Augmented Reality Anwendungen
- →Medizinische Bildanalyse
- →Autonomous Vehicle Vision
Empfohlene Tools
- •OpenCV für klassische CV
- •TensorFlow/PyTorch für Deep Learning
- •Detectron2 für Objekterkennung
- •MediaPipe für Real-time CV