chore: Update .gitignore to include custom ignores and remove outdated hint from prompt_plan.md

2025-04-12 10:14:46 +01:00
parent 996c071665
commit 392e402c2e
3 changed files with 41 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -171,4 +171,10 @@ cython_debug/
 .ruff_cache/

 # PyPI configuration file
-.pypirc
+.pypirc
+
+# Custom Ignores
+data/
+outputs/
+logs/
+*.pth
--- a/prompt_plan.md
+++ b/prompt_plan.md
@@ -101,7 +101,6 @@ Implement the core dataset loading logic in `utils/data_utils.py`:
    *   In `__len__(self)`:
        *   Return the total number of images.

-*(Hint: Refer to the Torchvision Object Detection Finetuning Tutorial for guidance on parsing masks and structuring the target dictionary: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html)*
 ```

 ## Prompt 5: Data Utilities (Transforms and Collate)
--- a/scripts/download_data.sh
+++ b/scripts/download_data.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+set -e # Exit immediately if a command exits with a non-zero status.
+
+DATA_DIR="data"
+TARGET_DIR="$DATA_DIR/PennFudanPed"
+ZIP_FILE="$DATA_DIR/PennFudanPed.zip"
+URL="https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip"
+
+# 1. Check if the target directory already exists
+if [ -d "$TARGET_DIR" ]; then
+    echo "Dataset already exists at $TARGET_DIR. Skipping download."
+    exit 0
+fi
+
+# 2. Create the data directory if it doesn't exist
+mkdir -p "$DATA_DIR"
+echo "Created directory $DATA_DIR (if it didn't exist)."
+
+# 3. Download the dataset
+echo "Downloading dataset from $URL..."
+wget -O "$ZIP_FILE" "$URL"
+echo "Download complete."
+
+# 4. Extract the dataset
+echo "Extracting $ZIP_FILE to $DATA_DIR..."
+unzip -q "$ZIP_FILE" -d "$DATA_DIR" # -q for quiet mode
+echo "Extraction complete."
+
+# 5. Remove the zip file
+rm "$ZIP_FILE"
+echo "Removed $ZIP_FILE."
+
+echo "Dataset setup complete in $TARGET_DIR."