tensorflow
diff --git a/‎official/projects/waste_identification_ml/fine_tuning/RF-DETR/Train_RF-DETR.ipynb‎
Lines changed: 362 additions & 1 deletion b/‎official/projects/waste_identification_ml/fine_tuning/RF-DETR/Train_RF-DETR.ipynb‎
Lines changed: 362 additions & 1 deletion
@@ -1 +1,362 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"machine_shape":"hm","gpuType":"A100","authorship_tag":"ABX9TyNuJPxbmgQCZNkFituuedHE"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","source":["In this notebook, we train a custom **RF-DETR** (Region-Focused DEtection TRansformer) model for object detection using a labeled dataset in COCO format. RF-DETR improves detection accuracy by focusing attention on spatial regions of interest, making it well-suited for complex scenes with clutter or small objects. To ensure efficient training and avoid overfitting, we incorporate key training callbacks—such as early stopping, model checkpointing, and learning rate scheduling. By the end of this notebook, you’ll have a fully trained RF-DETR model ready for evaluation and deployment."],"metadata":{"id":"KP594zrsI99p"}},{"cell_type":"code","source":["!pip install -q rfdetr supervision roboflow"],"metadata":{"id":"qx0ZDiyFoXEj"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Connect to google drive for data access.\n","from google.colab import drive\n","drive.mount('/content/gdrive')\n","\n","try:\n"," !ln -s /content/gdrive/My\\ Drive/ /mydrive\n"," print('Successful')\n","except Exception as e:\n"," print(e)\n"," print('Not successful')"],"metadata":{"id":"PiYCdv6Ba_5X"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Import required libraries.\n","import io\n","import requests\n","import supervision as sv\n","from PIL import Image\n","from rfdetr import RFDETRLarge\n","from rfdetr.util.coco_classes import COCO_CLASSES\n","from typing import Dict, List, Optional, Tuple, Any\n","import json\n","import glob\n","import os\n","import natsort\n","import gc\n","import torch\n","import weakref\n","import pandas as pd\n","import matplotlib.pyplot as plt\n","import warnings\n","\n","warnings.filterwarnings(\"ignore\")\n","\n","!export CUDA_LAUNCH_BLOCKING=1\n","!export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True"],"metadata":{"id":"v3kqeud0T8cr"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#@title Utils\n","def read_coco_json(json_path):\n"," \"\"\"\n"," Reads a COCO-format JSON annotation file.\n","\n"," Args:\n"," json_path: Path to the COCO JSON file.\n","\n"," Returns:\n"," Dictionary with keys 'images', 'annotations', and 'categories'.\n"," \"\"\"\n"," with open(json_path, 'r', encoding='utf-8') as f:\n"," coco_data = json.load(f)\n"," return coco_data\n","\n","\n","def cleanup_gpu_memory(obj=None, verbose: bool = False):\n","\n"," if not torch.cuda.is_available():\n"," if verbose:\n"," print(\"[INFO] CUDA is not available. No GPU cleanup needed.\")\n"," return\n","\n"," def get_memory_stats():\n"," allocated = torch.cuda.memory_allocated()\n"," reserved = torch.cuda.memory_reserved()\n"," return allocated, reserved\n","\n"," torch.cuda.synchronize()\n","\n"," if verbose:\n"," alloc, reserv = get_memory_stats()\n"," print(f\"[Before] Allocated: {alloc / 1024**2:.2f} MB | Reserved: {reserv / 1024**2:.2f} MB\")\n","\n"," # Ensure we drop all strong references\n"," if obj is not None:\n"," ref = weakref.ref(obj)\n"," del obj\n"," if ref() is not None and verbose:\n"," print(\"[WARNING] Object not fully garbage collected yet.\")\n","\n"," gc.collect()\n"," torch.cuda.empty_cache()\n"," torch.cuda.ipc_collect()\n","\n"," torch.cuda.synchronize()\n","\n"," if verbose:\n"," alloc, reserv = get_memory_stats()\n"," print(f\"[After] Allocated: {alloc / 1024**2:.2f} MB | Reserved: {reserv / 1024**2:.2f} MB\")"],"metadata":{"id":"MhSrO1vEi3Qq","cellView":"form"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Load pre-trained model."],"metadata":{"id":"wynrRzfuHd0q"}},{"cell_type":"code","source":["model = RFDETRLarge()"],"metadata":{"id":"I2qNzL84UALL"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Start tensorboard to visualize training metric."],"metadata":{"id":"sT3NuCJHdHBJ"}},{"cell_type":"code","source":["model_output_path = \"/mydrive/LLM/rf-detr/data/output/\" # @param {type: \"string\", placeholder: \"[path to the model]\", isTemplate: true}"],"metadata":{"id":"7C9Jo-yCIREe"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["%load_ext tensorboard\n","%tensorboard --logdir $model_output_path"],"metadata":{"id":"F50TFF5nUQKo"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["history = []\n","\n","def callback2(data):\n","\thistory.append(data)\n","\n","model.callbacks[\"on_fit_epoch_end\"].append(callback2)"],"metadata":{"id":"IlNDZK3AaZ81"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Dataset"],"metadata":{"id":"TOhM4Z5Ergab"}},{"cell_type":"markdown","source":["RF-DETR expects the dataset to be in COCO format. Divide your dataset into three subdirectories: `train`, `valid`, and `test`. Each subdirectory should contain its own `_annotations.coco.json` file that holds the annotations for that particular split, along with the corresponding image files. Below is an example of the directory structure:\n","\n","```\n","dataset/\n","├── train/\n","│ ├── _annotations.coco.json\n","│ ├── image1.jpg\n","│ ├── image2.jpg\n","│ └── ... (other image files)\n","├── valid/\n","│ ├── _annotations.coco.json\n","│ ├── image1.jpg\n","│ ├── image2.jpg\n","│ └── ... (other image files)\n","└── test/\n"," ├── _annotations.coco.json\n"," ├── image1.jpg\n"," ├── image2.jpg\n"," └── ... (other image files)\n","```"],"metadata":{"id":"MK6zkzY1lPtw"}},{"cell_type":"markdown","source":["The annotated COCO JSON files should be in the format mentioned in the link - [click here](https://roboflow.com/formats/coco-json?ref=blog.roboflow.com)"],"metadata":{"id":"_5E9_iSJtkwL"}},{"cell_type":"code","source":["dataset = \"/mydrive/LLM/rf-detr/data/dataset/\" # @param {type: \"string\", placeholder: \"[path to the dataset]\", isTemplate: true}"],"metadata":{"id":"l1UkeubNt0WE"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Training"],"metadata":{"id":"YJM9oMzbJcti"}},{"cell_type":"code","source":["model.train(\n"," dataset_dir=dataset,\n"," epochs=80,\n"," batch_size=16,\n"," grad_accum_steps=1,\n"," lr=1e-4,\n"," output_dir=model_output_path,\n"," early_stopping=True,\n"," early_stopping_patience=10,\n"," early_stopping_min_delta=0.001,\n"," early_stopping_use_ema=False\n",")"],"metadata":{"id":"FzEfshpSarht"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Plot training metric curves."],"metadata":{"id":"F2qVWL_buXZ8"}},{"cell_type":"code","source":["df = pd.DataFrame(history)\n","\n","plt.figure(figsize=(12, 8))\n","\n","plt.plot(\n","\tdf['epoch'],\n","\tdf['train_loss'],\n","\tlabel='Training Loss',\n","\tmarker='o',\n","\tlinestyle='-'\n",")\n","\n","plt.plot(\n","\tdf['epoch'],\n","\tdf['test_loss'],\n","\tlabel='Validation Loss',\n","\tmarker='o',\n","\tlinestyle='--'\n",")\n","\n","plt.title('Train/Validation Loss over Epochs')\n","plt.xlabel('Epoch')\n","plt.ylabel('Loss')\n","plt.legend()\n","plt.grid(True)\n","\n","plt.show()"],"metadata":{"id":"Ojacj2xwwsP2"},"execution_count":null,"outputs":[]}]}
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "machine_shape": "hm",
+ "gpuType": "A100",
+ "authorship_tag": "ABX9TyNuJPxbmgQCZNkFituuedHE"
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "accelerator": "GPU"
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "In this notebook, we train a custom **RF-DETR** (Region-Focused DEtection TRansformer) model for object detection using a labeled dataset in COCO format. RF-DETR improves detection accuracy by focusing attention on spatial regions of interest, making it well-suited for complex scenes with clutter or small objects. To ensure efficient training and avoid overfitting, we incorporate key training callbacks—such as early stopping, model checkpointing, and learning rate scheduling. By the end of this notebook, you’ll have a fully trained RF-DETR model ready for evaluation and deployment."
+ ],
+ "metadata": {
+ "id": "KP594zrsI99p"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install -q rfdetr supervision roboflow"
+ ],
+ "metadata": {
+ "id": "qx0ZDiyFoXEj"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Connect to google drive for data access.\n",
+ "from google.colab import drive\n",
+ "drive.mount('/content/gdrive')\n",
+ "\n",
+ "try:\n",
+ " !ln -s /content/gdrive/My\\ Drive/ /mydrive\n",
+ " print('Successful')\n",
+ "except Exception as e:\n",
+ " print(e)\n",
+ " print('Not successful')"
+ ],
+ "metadata": {
+ "id": "PiYCdv6Ba_5X"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Import required libraries.\n",
+ "import io\n",
+ "import requests\n",
+ "import supervision as sv\n",
+ "from PIL import Image\n",
+ "from rfdetr import RFDETRLarge\n",
+ "from rfdetr.util.coco_classes import COCO_CLASSES\n",
+ "from typing import Dict, List, Optional, Tuple, Any\n",
+ "import json\n",
+ "import glob\n",
+ "import os\n",
+ "import natsort\n",
+ "import gc\n",
+ "import torch\n",
+ "import weakref\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import warnings\n",
+ "\n",
+ "warnings.filterwarnings(\"ignore\")\n",
+ "\n",
+ "!export CUDA_LAUNCH_BLOCKING=1\n",
+ "!export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True"
+ ],
+ "metadata": {
+ "id": "v3kqeud0T8cr"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#@title Utils\n",
+ "def read_coco_json(json_path):\n",
+ " \"\"\"\n",
+ " Reads a COCO-format JSON annotation file.\n",
+ "\n",
+ " Args:\n",
+ " json_path: Path to the COCO JSON file.\n",
+ "\n",
+ " Returns:\n",
+ " Dictionary with keys 'images', 'annotations', and 'categories'.\n",
+ " \"\"\"\n",
+ " with open(json_path, 'r', encoding='utf-8') as f:\n",
+ " coco_data = json.load(f)\n",
+ " return coco_data\n",
+ "\n",
+ "\n",
+ "def cleanup_gpu_memory(obj=None, verbose: bool = False):\n",
+ "\n",
+ " if not torch.cuda.is_available():\n",
+ " if verbose:\n",
+ " print(\"[INFO] CUDA is not available. No GPU cleanup needed.\")\n",
+ " return\n",
+ "\n",
+ " def get_memory_stats():\n",
+ " allocated = torch.cuda.memory_allocated()\n",
+ " reserved = torch.cuda.memory_reserved()\n",
+ " return allocated, reserved\n",
+ "\n",
+ " torch.cuda.synchronize()\n",
+ "\n",
+ " if verbose:\n",
+ " alloc, reserv = get_memory_stats()\n",
+ " print(f\"[Before] Allocated: {alloc / 1024**2:.2f} MB | Reserved: {reserv / 1024**2:.2f} MB\")\n",
+ "\n",
+ " # Ensure we drop all strong references\n",
+ " if obj is not None:\n",
+ " ref = weakref.ref(obj)\n",
+ " del obj\n",
+ " if ref() is not None and verbose:\n",
+ " print(\"[WARNING] Object not fully garbage collected yet.\")\n",
+ "\n",
+ " gc.collect()\n",
+ " torch.cuda.empty_cache()\n",
+ " torch.cuda.ipc_collect()\n",
+ "\n",
+ " torch.cuda.synchronize()\n",
+ "\n",
+ " if verbose:\n",
+ " alloc, reserv = get_memory_stats()\n",
+ " print(f\"[After] Allocated: {alloc / 1024**2:.2f} MB | Reserved: {reserv / 1024**2:.2f} MB\")"
+ ],
+ "metadata": {
+ "id": "MhSrO1vEi3Qq",
+ "cellView": "form"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Load pre-trained model."
+ ],
+ "metadata": {
+ "id": "wynrRzfuHd0q"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "model = RFDETRLarge()"
+ ],
+ "metadata": {
+ "id": "I2qNzL84UALL"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Start tensorboard to visualize training metric."
+ ],
+ "metadata": {
+ "id": "sT3NuCJHdHBJ"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "model_output_path = \"/mydrive/LLM/rf-detr/data/output/\" # @param {type: \"string\", placeholder: \"[path to the model]\", isTemplate: true}"
+ ],
+ "metadata": {
+ "id": "7C9Jo-yCIREe"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "%load_ext tensorboard\n",
+ "%tensorboard --logdir $model_output_path"
+ ],
+ "metadata": {
+ "id": "F50TFF5nUQKo"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "history = []\n",
+ "\n",
+ "def callback2(data):\n",
+ "\thistory.append(data)\n",
+ "\n",
+ "model.callbacks[\"on_fit_epoch_end\"].append(callback2)"
+ ],
+ "metadata": {
+ "id": "IlNDZK3AaZ81"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Dataset"
+ ],
+ "metadata": {
+ "id": "TOhM4Z5Ergab"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "RF-DETR expects the dataset to be in COCO format. Divide your dataset into three subdirectories: `train`, `valid`, and `test`. Each subdirectory should contain its own `_annotations.coco.json` file that holds the annotations for that particular split, along with the corresponding image files. Below is an example of the directory structure:\n",
+ "\n",
+ "```\n",
+ "dataset/\n",
+ "├── train/\n",
+ "│ ├── _annotations.coco.json\n",
+ "│ ├── image1.jpg\n",
+ "│ ├── image2.jpg\n",
+ "│ └── ... (other image files)\n",
+ "├── valid/\n",
+ "│ ├── _annotations.coco.json\n",
+ "│ ├── image1.jpg\n",
+ "│ ├── image2.jpg\n",
+ "│ └── ... (other image files)\n",
+ "└── test/\n",
+ " ├── _annotations.coco.json\n",
+ " ├── image1.jpg\n",
+ " ├── image2.jpg\n",
+ " └── ... (other image files)\n",
+ "```"
+ ],
+ "metadata": {
+ "id": "MK6zkzY1lPtw"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "The annotated COCO JSON files should be in the format mentioned in the link - [click here](https://roboflow.com/formats/coco-json?ref=blog.roboflow.com)"
+ ],
+ "metadata": {
+ "id": "_5E9_iSJtkwL"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "dataset = \"/mydrive/LLM/rf-detr/data/dataset/\" # @param {type: \"string\", placeholder: \"[path to the dataset]\", isTemplate: true}"
+ ],
+ "metadata": {
+ "id": "l1UkeubNt0WE"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Training"
+ ],
+ "metadata": {
+ "id": "YJM9oMzbJcti"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "model.train(\n",
+ " dataset_dir=dataset,\n",
+ " epochs=200,\n",
+ " batch_size=8,\n",
+ " grad_accum_steps=1,\n",
+ " output_dir=model_output_path,\n",
+ " early_stopping=True,\n",
+ " early_stopping_patience=10,\n",
+ " early_stopping_min_delta=0.001,\n",
+ " early_stopping_use_ema=False,\n",
+ " lr=1e-4, # Higher LR for the new decoder\n",
+ " lr_encoder=1e-5, # Lower LR for the pre-trained encoder\n",
+ " weight_decay=1e-4, # Adds regularization to prevent overfitting\n",
+ " lr_scheduler='cosine', # Use the smoother cosine scheduler\n",
+ " warmup_epochs=2, # Stabilizes the beginning of training\n",
+ ")"
+ ],
+ "metadata": {
+ "id": "FzEfshpSarht"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Plot training metric curves."
+ ],
+ "metadata": {
+ "id": "F2qVWL_buXZ8"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df = pd.DataFrame(history)\n",
+ "\n",
+ "plt.figure(figsize=(12, 8))\n",
+ "\n",
+ "plt.plot(\n",
+ "\tdf['epoch'],\n",
+ "\tdf['train_loss'],\n",
+ "\tlabel='Training Loss',\n",
+ "\tmarker='o',\n",
+ "\tlinestyle='-'\n",
+ ")\n",
+ "\n",
+ "plt.plot(\n",
+ "\tdf['epoch'],\n",
+ "\tdf['test_loss'],\n",
+ "\tlabel='Validation Loss',\n",
+ "\tmarker='o',\n",
+ "\tlinestyle='--'\n",
+ ")\n",
+ "\n",
+ "plt.title('Train/Validation Loss over Epochs')\n",
+ "plt.xlabel('Epoch')\n",
+ "plt.ylabel('Loss')\n",
+ "plt.legend()\n",
+ "plt.grid(True)\n",
+ "\n",
+ "plt.show()"
+ ],
+ "metadata": {
+ "id": "Ojacj2xwwsP2"
+ },
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}