Refactor: Update Penn-Fudan Mask R-CNN configuration and data transformation logic for memory optimization

2025-04-12 11:13:22 +01:00
parent 217cfba9ba
commit 2b38c04a57
2 changed files with 31 additions and 22 deletions
--- a/configs/pennfudan_maskrcnn_config.py
+++ b/configs/pennfudan_maskrcnn_config.py
@@ -2,20 +2,30 @@
 Configuration for training Mask R-CNN on the Penn-Fudan dataset.
 """

-from .base_config import base_config
+from configs.base_config import base_config

+# Create a copy of the base configuration
 config = base_config.copy()

-# Override necessary settings from base_config
+# Update specific values for this experiment
 config.update(
    {
-        "config_name": "pennfudan_maskrcnn_v1",  # Unique name for this experiment run
-        "data_root": "data/PennFudanPed",  # Explicitly set dataset root
-        "num_classes": 2,  # Penn-Fudan has 1 class (pedestrian) + background
-        # Adjust other parameters as needed for this specific experiment, e.g.:
-        # 'batch_size': 4,
-        # 'num_epochs': 15,
-        # 'lr': 0.001,
+        # Core configuration
+        "config_name": "pennfudan_maskrcnn_v1",
+        "data_root": "data/PennFudanPed",
+        "num_classes": 2,  # background + pedestrian
+        # Training parameters - modified for memory constraints
+        "batch_size": 1,  # Reduced from 2 to 1 to save memory
+        "num_epochs": 10,
+        # Optimizer settings
+        "lr": 0.002,  # Slightly reduced learning rate for smaller batch size
+        "momentum": 0.9,
+        "weight_decay": 0.0005,
+        # Memory optimization settings
+        "pin_memory": False,  # Set to False to reduce memory pressure
+        "num_workers": 2,  # Reduced from 4 to 2
+        # Device settings
+        "device": "cuda",
    }
 )

--- a/utils/data_utils.py
+++ b/utils/data_utils.py
@@ -104,29 +104,28 @@ class PennFudanDataset(torch.utils.data.Dataset):


 def get_transform(train):
-    """Gets the appropriate set of transforms.
+    """Get the transformations for the dataset.

    Args:
-        train (bool): Whether to apply training augmentations.
+        train (bool): Whether to get transforms for training or evaluation.

    Returns:
-        torchvision.transforms.Compose: A composed Torchvision transform.
+        torchvision.transforms.Compose: The composed transforms.
    """
    transforms = []
-    # Always convert image to PyTorch tensor and scale to [0, 1]
+
+    # Convert to PyTorch tensor and normalize
    transforms.append(T.ToImage())
+
+    # Add resize transform to reduce memory usage (max size of 800px)
+    transforms.append(T.Resize(800))
+
    transforms.append(T.ToDtype(torch.float32, scale=True))

+    # Data augmentation for training
    if train:
-        # Add simple data augmentation for training
-        transforms.append(T.RandomHorizontalFlip(p=0.5))
-        # Add other augmentations here if needed
-        # e.g., T.ColorJitter(...), T.RandomResizedCrop(...) ensuring
-        # bounding boxes/masks are handled correctly by v2 transforms.
-
-    # Note: Normalization (e.g., T.Normalize) is often applied,
-    # but pre-trained models in torchvision usually handle this internally
-    # or expect [0, 1] range inputs.
+        transforms.append(T.RandomHorizontalFlip(0.5))
+        # Could add more augmentations here if desired

    return T.Compose(transforms)