From 2b38c04a572e553a7789e67427a834cc264704b7 Mon Sep 17 00:00:00 2001 From: Craig Date: Sat, 12 Apr 2025 11:13:22 +0100 Subject: [PATCH] Refactor: Update Penn-Fudan Mask R-CNN configuration and data transformation logic for memory optimization --- configs/pennfudan_maskrcnn_config.py | 28 +++++++++++++++++++--------- utils/data_utils.py | 25 ++++++++++++------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/configs/pennfudan_maskrcnn_config.py b/configs/pennfudan_maskrcnn_config.py index a3fc51d..d0b2617 100644 --- a/configs/pennfudan_maskrcnn_config.py +++ b/configs/pennfudan_maskrcnn_config.py @@ -2,20 +2,30 @@ Configuration for training Mask R-CNN on the Penn-Fudan dataset. """ -from .base_config import base_config +from configs.base_config import base_config +# Create a copy of the base configuration config = base_config.copy() -# Override necessary settings from base_config +# Update specific values for this experiment config.update( { - "config_name": "pennfudan_maskrcnn_v1", # Unique name for this experiment run - "data_root": "data/PennFudanPed", # Explicitly set dataset root - "num_classes": 2, # Penn-Fudan has 1 class (pedestrian) + background - # Adjust other parameters as needed for this specific experiment, e.g.: - # 'batch_size': 4, - # 'num_epochs': 15, - # 'lr': 0.001, + # Core configuration + "config_name": "pennfudan_maskrcnn_v1", + "data_root": "data/PennFudanPed", + "num_classes": 2, # background + pedestrian + # Training parameters - modified for memory constraints + "batch_size": 1, # Reduced from 2 to 1 to save memory + "num_epochs": 10, + # Optimizer settings + "lr": 0.002, # Slightly reduced learning rate for smaller batch size + "momentum": 0.9, + "weight_decay": 0.0005, + # Memory optimization settings + "pin_memory": False, # Set to False to reduce memory pressure + "num_workers": 2, # Reduced from 4 to 2 + # Device settings + "device": "cuda", } ) diff --git a/utils/data_utils.py b/utils/data_utils.py index 322c54d..dc33147 100644 --- a/utils/data_utils.py +++ b/utils/data_utils.py @@ -104,29 +104,28 @@ class PennFudanDataset(torch.utils.data.Dataset): def get_transform(train): - """Gets the appropriate set of transforms. + """Get the transformations for the dataset. Args: - train (bool): Whether to apply training augmentations. + train (bool): Whether to get transforms for training or evaluation. Returns: - torchvision.transforms.Compose: A composed Torchvision transform. + torchvision.transforms.Compose: The composed transforms. """ transforms = [] - # Always convert image to PyTorch tensor and scale to [0, 1] + + # Convert to PyTorch tensor and normalize transforms.append(T.ToImage()) + + # Add resize transform to reduce memory usage (max size of 800px) + transforms.append(T.Resize(800)) + transforms.append(T.ToDtype(torch.float32, scale=True)) + # Data augmentation for training if train: - # Add simple data augmentation for training - transforms.append(T.RandomHorizontalFlip(p=0.5)) - # Add other augmentations here if needed - # e.g., T.ColorJitter(...), T.RandomResizedCrop(...) ensuring - # bounding boxes/masks are handled correctly by v2 transforms. - - # Note: Normalization (e.g., T.Normalize) is often applied, - # but pre-trained models in torchvision usually handle this internally - # or expect [0, 1] range inputs. + transforms.append(T.RandomHorizontalFlip(0.5)) + # Could add more augmentations here if desired return T.Compose(transforms)