sam3_local/examples/saco_gold_silver_vis_example.ipynb

{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "37048f21",
      "metadata": {},
      "outputs": [],
      "source": [
        "# Copyright (c) Meta Platforms, Inc. and affiliates."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "154d8663",
      "metadata": {},
      "outputs": [],
      "source": [
        "using_colab = False"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "b85d99d9",
      "metadata": {},
      "outputs": [],
      "source": [
        "if using_colab:\n",
        "    import torch\n",
        "    import torchvision\n",
        "    print(\"PyTorch version:\", torch.__version__)\n",
        "    print(\"Torchvision version:\", torchvision.__version__)\n",
        "    print(\"CUDA is available:\", torch.cuda.is_available())\n",
        "    import sys\n",
        "    !{sys.executable} -m pip install opencv-python matplotlib scikit-learn\n",
        "    !{sys.executable} -m pip install 'git+https://github.com/facebookresearch/sam3.git'"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "da21a3bc",
      "metadata": {},
      "outputs": [],
      "source": [
        "import os\n",
        "from glob import glob\n",
        "\n",
        "import numpy as np\n",
        "import sam3.visualization_utils as utils\n",
        "\n",
        "from matplotlib import pyplot as plt\n",
        "\n",
        "COLORS = utils.pascal_color_map()[1:]"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "57e85e7e",
      "metadata": {},
      "source": [
        "1. Load the data"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "a796734e",
      "metadata": {},
      "outputs": [],
      "source": [
        "# Preapre the data path\n",
        "ANNOT_DIR = None # PUT YOUR ANNOTATION PATH HERE\n",
        "IMG_DIR = None # PUT YOUR IMAGE PATH HERE\n",
        "\n",
        "# Load the SA-CO/Gold annotation files\n",
        "annot_file_list = glob(os.path.join(ANNOT_DIR, \"*gold*.json\"))\n",
        "annot_dfs = utils.get_annot_dfs(file_list=annot_file_list)"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "74bf92b1",
      "metadata": {},
      "source": [
        "Show the annotation files being loaded"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "a95620ec",
      "metadata": {},
      "outputs": [],
      "source": [
        "annot_dfs.keys()"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "5ce211d3",
      "metadata": {},
      "source": [
        "2. Examples of the data format"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "6ba749db",
      "metadata": {},
      "outputs": [],
      "source": [
        "annot_dfs[\"gold_fg_sports_equipment_merged_a_release_test\"].keys()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "4b6dc186",
      "metadata": {},
      "outputs": [],
      "source": [
        "annot_dfs[\"gold_fg_sports_equipment_merged_a_release_test\"][\"info\"]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "c41091b3",
      "metadata": {},
      "outputs": [],
      "source": [
        "annot_dfs[\"gold_fg_sports_equipment_merged_a_release_test\"][\"images\"].head(3)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "a7df5771",
      "metadata": {},
      "outputs": [],
      "source": [
        "annot_dfs[\"gold_fg_sports_equipment_merged_a_release_test\"][\"annotations\"].head(3)"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "5673a63f",
      "metadata": {},
      "source": [
        "3. Visualize the data"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "b1fc2a24",
      "metadata": {},
      "outputs": [],
      "source": [
        "# Select a target dataset\n",
        "target_dataset_name = \"gold_fg_food_merged_a_release_test\"\n",
        "\n",
        "import cv2\n",
        "from pycocotools import mask as mask_util\n",
        "from collections import defaultdict\n",
        "\n",
        "# Group GT annotations by image_id\n",
        "gt_image_np_pairs = annot_dfs[target_dataset_name][\"images\"]\n",
        "gt_annotations = annot_dfs[target_dataset_name][\"annotations\"]\n",
        "\n",
        "gt_image_np_map = {img[\"id\"]: img for _, img in gt_image_np_pairs.iterrows()}\n",
        "gt_image_np_ann_map = defaultdict(list)\n",
        "for _, ann in gt_annotations.iterrows():\n",
        "    image_id = ann[\"image_id\"]\n",
        "    if image_id not in gt_image_np_ann_map:\n",
        "        gt_image_np_ann_map[image_id] = []\n",
        "    gt_image_np_ann_map[image_id].append(ann)\n",
        "\n",
        "positiveNPs = common_image_ids = [img_id for img_id in gt_image_np_map.keys() if img_id in gt_image_np_ann_map and gt_image_np_ann_map[img_id]]\n",
        "negativeNPs = [img_id for img_id in gt_image_np_map.keys() if img_id not in gt_image_np_ann_map or not gt_image_np_ann_map[img_id]]\n",
        "\n",
        "num_image_nps_to_show = 10\n",
        "fig, axes = plt.subplots(num_image_nps_to_show, 3, figsize=(15, 5 * num_image_nps_to_show))\n",
        "for idx in range(num_image_nps_to_show):\n",
        "    rand_idx = np.random.randint(len(positiveNPs))\n",
        "    image_id = positiveNPs[rand_idx]\n",
        "    noun_phrase = gt_image_np_map[image_id][\"text_input\"]\n",
        "    img_rel_path = gt_image_np_map[image_id][\"file_name\"]\n",
        "    full_path = os.path.join(IMG_DIR, f\"{img_rel_path}\")\n",
        "    img = cv2.imread(full_path)\n",
        "    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
        "    gt_annotation = gt_image_np_ann_map[image_id]\n",
        "\n",
        "    def display_image_in_subplot(img, axes, row, col, title=\"\"):\n",
        "        axes[row, col].imshow(img)\n",
        "        axes[row, col].set_title(title)\n",
        "        axes[row, col].axis('off')\n",
        "\n",
        "\n",
        "    noun_phrases = [noun_phrase]\n",
        "    annot_masks = [mask_util.decode(ann[\"segmentation\"]) for ann in gt_annotation]\n",
        "\n",
        "    # Show the image\n",
        "    display_image_in_subplot(img, axes, idx, 0, f\"{noun_phrase}\")\n",
        "\n",
        "    # Show all masks over a white background\n",
        "    all_masks = utils.draw_masks_to_frame(\n",
        "        frame=np.ones_like(img)*255, masks=annot_masks, colors=COLORS[: len(annot_masks)]\n",
        "    )\n",
        "    display_image_in_subplot(all_masks, axes, idx, 1, f\"{noun_phrase} - Masks only\")\n",
        "\n",
        "    # Show masks overlaid on the image\n",
        "    masked_frame = utils.draw_masks_to_frame(\n",
        "        frame=img, masks=annot_masks, colors=COLORS[: len(annot_masks)]\n",
        "    )\n",
        "    display_image_in_subplot(masked_frame, axes, idx, 2, f\"{noun_phrase} - Masks overlaid\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "84a20e0e",
      "metadata": {},
      "outputs": [],
      "source": []
    }
  ],
  "metadata": {
    "fileHeader": "",
    "fileUid": "a2cedcd3-26e1-430d-b718-764d51077f86",
    "isAdHoc": false,
    "kernelspec": {
      "display_name": "Python 3 (ipykernel)",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.13"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}