@EdWorkingPaper{ai25-1276, title = "Comparing Machine Learning Methods for Estimating Heterogeneous Treatment Effects in Randomized Trials: A Comprehensive Simulation Study", author = "Luke Miratrix, Polina Polskaia, Richard Dorsett, Pei Zhu, Nicholas Commins, J. David Selby", institution = "Annenberg Institute at Brown University", number = "1276", year = "2025", month = "September", URL = "http://www.edworkingpapers.com/ai25-1276", abstract = {This study compares 18 machine learning methods for estimating heterogeneous treatment effects in randomized controlled trials, using simulations calibrated to two large-scale educational experiments. We evaluate performance across continuous and binary outcomes with diverse and realistic treatment effect heterogeneity patterns, varying sample sizes, covariate complexities, and effect magnitudes. Bayesian Additive Regression Trees with S-learner (BART S) outperforms alternatives on average. While no method predicts individual effects with high accuracy, some show promise in identifying who benefits most or least. An empirical application illustrates how ML methods can reveal heterogeneity patterns beyond conventional subgroup analysis. These findings highlight both the potential and the limitations of ML, offering evidence-based practical guidance for analyzing treatment effect variation in experimental evaluations.}, }