How to cite item

Benchmarking YOLOs in breast ultrasound lesion segmentation

  
@article{QIMS154956,
	author = {Shaode Yu and Ming Huang and Enqi Chen and Bing Zhu and Xiaokun Liang and Yaoqin Xie},
	title = {Benchmarking YOLOs in breast ultrasound lesion segmentation},
	journal = {Quantitative Imaging in Medicine and Surgery},
	volume = {16},
	number = {7},
	year = {2026},
	keywords = {},
	abstract = {Background: Breast ultrasound (BUS) is widely used for breast cancer (BC) screening and diagnosis, yet accurate breast lesion segmentation remains challenging. Although You Only Look Once (YOLO) and its variants have shown strong performance in object segmentation, their effectiveness on BUS lesion segmentation has not been systematically explored. This study aims to benchmark twelve YOLO variants from four families (YOLOv5, YOLOv8, YOLOv9, and YOLO11) for BUS lesion segmentation under same-database and cross-database settings.Methods: Twelve YOLO variants spanning nano to extra-large scales were fine-tuned and evaluated on two public BUS datasets, the Breast Ultrasound Images (BUSI) dataset (n=647) and the breast ultrasound lesion segmentation dataset from the University of Castilla-La Mancha (BUS-UCLM) dataset (n=264). Each dataset was split into training (80%), validation (10%), and testing (10%) subsets with stratified random partitioning, and experiments were repeated across eight random seeds. Six evaluation metrics, including Dice coefficient, intersection over union (IoU), precision, recall, F1 score (F1S), and mean average precision at IoU threshold 0.5 (mAP@0.5), were used. In addition, U-Net and DeepLabV3+ were compared under the same protocol.Results: Under same-database evaluation, all variants achieved strong performance, with mean Dice scores ≥0.81 on BUSI and ≥0.87 on UCLM. On BUSI, yolov5s achieved the highest mean Dice (0.93±0.012) and IoU (0.88±0.014). On UCLM, yolov8m attained the highest mean Dice (0.98±0.006) and IoU (0.96±0.007). In cross-database evaluation, however, performance degraded substantially, with Dice scores decreasing by approximately 0.20 or more. For BUSI→UCLM, yolov5s achieved the highest mean Dice (0.71±0.032); and for UCLM→BUSI, yolov9c achieved the highest mean Dice (0.60±0.038). Among all variants, yolo11n demonstrated competitive performance across both same- and cross-database evaluations (BUSI Dice 0.91±0.014, and BUSI→UCLM Dice 0.69±0.034; UCLM Dice 0.96±0.009, and UCLM→BUSI 0.59±0.039) while maintaining low computational cost (training time },
	issn = {2223-4306},	url = {https://qims.amegroups.org/article/view/154956}
}