@Article{He2026,
journal="Biology of Sport",
issn="0860-021X",
year="2026",
title="The AI recommendation paradox: a systematic review evaluating 
the promise, peril, and path forward for large language models  
in exercise recommendation",
abstract="Large Language Models (LLMs) are rapidly emerging as tools for generating personalized exercise advice, creating an “AI Prescription Paradox” of promising potential but significant risks. This study systematically reviews the empirical evidence to evaluate the efficacy, quality, and safety of LLMs in exercise prescription. Following PRISMA guidelines, we conducted a systematic review of 24 empirical studies (N = 2,512 participants) published up to June 19, 2025. Data were extracted from human intervention trials, in silico expert evaluations, and human-computer interaction studies, and a comprehensive narrative synthesis was performed. Our synthesis reveals significant deficits. In head-to-head trials comparing AI to human experts, LLM-generated plans were inferior in 5 out of 6 (83%) cases for driving physiological adaptations. Most critically, systemic safety flaws were identified in 14 of 24 studies (58%), with models recommending contraindicated exercises for clinical populations. While the quality of AI advice was highly variable, novel conversational and context-aware models showed promise for user engagement. LLMs in their current state are powerful assistive tools but cannot safely replace the core decision-making and supervisory roles of human experts. We advocate for a shift towards a human-AI synergistic paradigm. To guide this, we propose a novel, evidence-based risk stratification framework to help practitioners harness these tools safely and effectively, ensuring that professional oversight remains paramount.",
author="He, Tianyuan
and Lu, Di
and Ma, Yongye
and He, Jiaxin
and Li, Duanying
and Li, Guoxing
and Sun, Jian",
pages="949--970",
doi="10.5114/biolsport.2026.158676",
url="http://dx.doi.org/10.5114/biolsport.2026.158676"
}