m2cai16-tool-locations/ annotations/ video01.json # or .xml / .txt video02.json frames/ video01/ frame_000001.jpg ... Here’s a robust parser using and torchvision :

def __init__(self, root_dir, transform=None): self.root_dir = root_dir self.transform = transform self.samples = [] # Collect all (frame_path, annotation_path) pairs ann_dir = os.path.join(root_dir, 'annotations') for ann_file in os.listdir(ann_dir): if not ann_file.endswith('.json'): continue ann_path = os.path.join(ann_dir, ann_file) video_id = ann_file.replace('.json', '') frame_dir = os.path.join(root_dir, 'frames', video_id) with open(ann_path, 'r') as f: annotations = json.load(f) for frame_name, boxes_info in annotations.items(): frame_path = os.path.join(frame_dir, frame_name) if os.path.exists(frame_path): self.samples.append((frame_path, boxes_info))

# 16 tool classes (example; adjust to your annotation file) CLASSES = [ 'background', 'grasper', 'scissors', 'hook', 'clipper', 'irrigator', 'specimen_bag', 'bipolar', 'hook_electrode', 'trocars', 'stapler', 'suction', 'clip_applier', 'vessel_sealer', 'ligasure', 'ultrasonic', 'other' ]

# Draw boxes img_with_boxes = draw_bounding_boxes(img, boxes, labels=[class_names[l] for l in labels], colors='red', width=2) plt.figure(figsize=(10, 8)) plt.imshow(img_with_boxes.permute(1,2,0)) plt.axis('off') plt.title(f"Frame {idx} — {len(boxes)} tools detected") plt.show() dataset = M2CAI16ToolLocations('./m2cai16-tool-locations') show_annotations(dataset, idx=0) 4. Useful Preprocessing for Training Convert to COCO format (for Detectron2, MMDetection, etc.):

import matplotlib.pyplot as plt from torchvision.utils import draw_bounding_boxes from torchvision.transforms import ToTensor def show_annotations(dataset, idx=0): img, target = dataset[idx] if isinstance(img, torch.Tensor): img = (img * 255).byte() if img.max() <= 1 else img else: img = ToTensor()(img).byte()

This dataset is designed for (bounding boxes) in laparoscopic cholecystectomy videos. It contains annotations for 16 tools, including their positions in video frames. 1. Dataset Overview & Utility Purpose : Train object detection models (e.g., YOLO, Faster R-CNN, DETR) to locate surgical instruments in real-time.

M2cai16-tool-locations Official

m2cai16-tool-locations/ annotations/ video01.json # or .xml / .txt video02.json frames/ video01/ frame_000001.jpg ... Here’s a robust parser using and torchvision :

def __init__(self, root_dir, transform=None): self.root_dir = root_dir self.transform = transform self.samples = [] # Collect all (frame_path, annotation_path) pairs ann_dir = os.path.join(root_dir, 'annotations') for ann_file in os.listdir(ann_dir): if not ann_file.endswith('.json'): continue ann_path = os.path.join(ann_dir, ann_file) video_id = ann_file.replace('.json', '') frame_dir = os.path.join(root_dir, 'frames', video_id) with open(ann_path, 'r') as f: annotations = json.load(f) for frame_name, boxes_info in annotations.items(): frame_path = os.path.join(frame_dir, frame_name) if os.path.exists(frame_path): self.samples.append((frame_path, boxes_info)) m2cai16-tool-locations

# 16 tool classes (example; adjust to your annotation file) CLASSES = [ 'background', 'grasper', 'scissors', 'hook', 'clipper', 'irrigator', 'specimen_bag', 'bipolar', 'hook_electrode', 'trocars', 'stapler', 'suction', 'clip_applier', 'vessel_sealer', 'ligasure', 'ultrasonic', 'other' ] m2cai16-tool-locations/ annotations/ video01

# Draw boxes img_with_boxes = draw_bounding_boxes(img, boxes, labels=[class_names[l] for l in labels], colors='red', width=2) plt.figure(figsize=(10, 8)) plt.imshow(img_with_boxes.permute(1,2,0)) plt.axis('off') plt.title(f"Frame {idx} — {len(boxes)} tools detected") plt.show() dataset = M2CAI16ToolLocations('./m2cai16-tool-locations') show_annotations(dataset, idx=0) 4. Useful Preprocessing for Training Convert to COCO format (for Detectron2, MMDetection, etc.): It contains annotations for 16 tools, including their

import matplotlib.pyplot as plt from torchvision.utils import draw_bounding_boxes from torchvision.transforms import ToTensor def show_annotations(dataset, idx=0): img, target = dataset[idx] if isinstance(img, torch.Tensor): img = (img * 255).byte() if img.max() <= 1 else img else: img = ToTensor()(img).byte()

This dataset is designed for (bounding boxes) in laparoscopic cholecystectomy videos. It contains annotations for 16 tools, including their positions in video frames. 1. Dataset Overview & Utility Purpose : Train object detection models (e.g., YOLO, Faster R-CNN, DETR) to locate surgical instruments in real-time.