publications | Bruno Loureiro

2025

Kernel ridge regression under power-law data: spectrum and generalization

Arie Wortsman , and Bruno Loureiro

2025

@misc{wortsman2025kernelridge,
  title = {Kernel ridge regression under power-law data: spectrum and generalization},
  author = {Wortsman, Arie and Loureiro, Bruno},
  year = {2025},
  eprint = {2510.04780},
  archiveprefix = {arXiv},
  primaryclass = {stat.ML},
  url = {https://arxiv.org/abs/2510.04780},
}

Scaling Laws and Spectra of Shallow Neural Networks in the Feature Learning Regime

Leonardo Defilippis , Yizhou Xu , Julius Girardin , and 5 more authors

2025

Bib

@misc{defilippis2025scalinglawsspectrashallow,
  title = {Scaling Laws and Spectra of Shallow Neural Networks in the Feature Learning Regime},
  author = {Defilippis, Leonardo and Xu, Yizhou and Girardin, Julius and Troiani, Emanuele and Erba, Vittorio and Zdeborová, Lenka and Loureiro, Bruno and Krzakala, Florent},
  year = {2025},
  eprint = {2509.24882},
  archiveprefix = {arXiv},
  primaryclass = {cs.LG},
  url = {https://arxiv.org/abs/2509.24882},
}

Statistical Advantage of Softmax Attention: Insights from Single-Location Regression

O. Duranthon , P. Marion , C. Boyer , and 2 more authors

2025

Bib

@misc{duranthon2025statisticaladvantagesoftmaxattention,
  title = {Statistical Advantage of Softmax Attention: Insights from Single-Location Regression},
  author = {Duranthon, O. and Marion, P. and Boyer, C. and Loureiro, B. and Zdeborová, L.},
  year = {2025},
  eprint = {2509.21936},
  archiveprefix = {arXiv},
  primaryclass = {cs.LG},
  url = {https://arxiv.org/abs/2509.21936},
}

Breaking the curse of dimensionality for linear rules: optimal predictors over the ellipsoid

Alexis Ayme , and Bruno Loureiro

2025

Bib

@misc{ayme2025breakingcursedimensionalitylinear,
  title = {Breaking the curse of dimensionality for linear rules: optimal predictors over the ellipsoid},
  author = {Ayme, Alexis and Loureiro, Bruno},
  year = {2025},
  eprint = {2509.21174},
  archiveprefix = {arXiv},
  primaryclass = {stat.ML},
  url = {https://arxiv.org/abs/2509.21174},
}

On the existence of consistent adversarial attacks in high-dimensional linear classification

Matteo Vilucchio , Lenka Zdeborová , and Bruno Loureiro

2025

Bib

@misc{vilucchio2025existenceconsistentadversarialattacks,
  title = {On the existence of consistent adversarial attacks in high-dimensional linear classification},
  author = {Vilucchio, Matteo and Zdeborová, Lenka and Loureiro, Bruno},
  year = {2025},
  eprint = {2506.12454},
  archiveprefix = {arXiv},
  primaryclass = {stat.ML},
  url = {https://arxiv.org/abs/2506.12454},
}

Asymptotics of SGD in Sequence-Single Index Models and Single-Layer Attention Networks

Luca Arnaboldi , Bruno Loureiro , Ludovic Stephan , and 2 more authors

2025

Bib

@misc{arnaboldi2025sequence,
  title = {Asymptotics of SGD in Sequence-Single Index Models and Single-Layer Attention Networks},
  author = {Arnaboldi, Luca and Loureiro, Bruno and Stephan, Ludovic and Krzakala, Florent and Zdeborová, Lenka},
  year = {2025},
  eprint = {2506.02651},
  archiveprefix = {arXiv},
  primaryclass = {stat.ML}
}

Dynamical mean-field analysis of adaptive Langevin diffusions: Replica-symmetric fixed point and empirical Bayes

Zhou Fan , Justin Ko , Bruno Loureiro , and 2 more authors

2025

Bib

@misc{fan2025replica,
  title = {Dynamical mean-field analysis of adaptive Langevin diffusions: Replica-symmetric fixed point and empirical Bayes},
  author = {Fan, Zhou and Ko, Justin and Loureiro, Bruno and Lu, Yue M. and Shen, Yandi},
  year = {2025},
  eprint = {2504.15558},
  archiveprefix = {arXiv},
  primaryclass = {math.ST}
}

Dynamical mean-field analysis of adaptive Langevin diffusions: Propagation-of-chaos and convergence of the linear response

Zhou Fan , Justin Ko , Bruno Loureiro , and 2 more authors

2025

Bib

@misc{fan2025propagation,
  title = {Dynamical mean-field analysis of adaptive Langevin diffusions: Propagation-of-chaos and convergence of the linear response},
  author = {Fan, Zhou and Ko, Justin and Loureiro, Bruno and Lu, Yue M. and Shen, Yandi},
  year = {2025},
  eprint = {2504.15556},
  archiveprefix = {arXiv},
  primaryclass = {math.ST}
}

Optimal Spectral Transitions in High-Dimensional Multi-Index Models

Leonardo Defilippis , Yatin Dandi , Pierre Mergny , and 2 more authors

2025

Bib

@misc{defilippis2025optimal,
  title = {Optimal Spectral Transitions in High-Dimensional Multi-Index Models},
  author = {Defilippis, Leonardo and Dandi, Yatin and Mergny, Pierre and Krzakala, Florent and Loureiro, Bruno},
  year = {2025},
  eprint = {2502.02545},
  archiveprefix = {arXiv},
  primaryclass = {cs.LG}
}

AISTATS
A Random Matrix Theory Perspective on the Spectrum of Learned Features and Asymptotic Generalization Capabilities

Yatin Dandi , Luca Pesce , Hugo Cui , and 3 more authors

In Proceedings of The 28th International Conference on Artificial Intelligence and Statistics , 03–05 may 2025

Abs Bib PDF

A key property of neural networks is their capacity of adapting to data during training. Yet, our current mathematical understanding of feature learning and its relationship to generalization remain limited. In this work, we provide a random matrix analysis of how fully-connected two-layer neural networks adapt to the target function after a single, but aggressive, gradient descent step. We rigorously establish the equivalence between the updated features and an isotropic spiked random feature model, in the limit of large batch size. For the latter model, we derive a deterministic equivalent description of the feature empirical covariance matrix in terms of certain low-dimensional operators. This allows us to sharply characterize the impact of training in the asymptotic feature spectrum, and in particular, provides a theoretical grounding for how the tails of the feature spectrum modify with training. The deterministic equivalent further yields the exact asymptotic generalization error, shedding light on the mechanisms behind its improvement in the presence of feature learning. Our result goes beyond standard random matrix ensembles, and therefore we believe it is of independent technical interest. Different from previous work, our result holds in the challenging maximal learning rate regime, is fully rigorous and allows for finitely supported second layer initialization, which turns out to be crucial for studying the functional expressivity of the learned features. This provides a sharp description of the impact of feature learning in the generalization of two-layer neural networks, beyond the random features and lazy training regimes.
@inproceedings{dandi2024random, title = {A Random Matrix Theory Perspective on the Spectrum of Learned Features and Asymptotic Generalization Capabilities}, author = {Dandi, Yatin and Pesce, Luca and Cui, Hugo and Krzakala, Florent and Lu, Yue and Loureiro, Bruno}, booktitle = {Proceedings of The 28th International Conference on Artificial Intelligence and Statistics}, pages = {2224--2232}, year = {2025}, editor = {Li, Yingzhen and Mandt, Stephan and Agrawal, Shipra and Khan, Emtiyaz}, volume = {258}, series = {Proceedings of Machine Learning Research}, month = {03--05 May}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v258/dandi25a.html} }
MLST
A Theoretical Perspective on Mode Collapse in Variational Inference

Roman Soletskyi , Marylou Gabrié , and Bruno Loureiro

Machine Learning: Science and Technology, 03–05 may 2025

Abs Bib

While deep learning has expanded the possibilities for highly expressive variational families, the practical benefits of these tools for variational inference (VI) are often limited by the minimization of the traditional Kullback-Leibler objective, which can yield suboptimal solutions. A major challenge in this context is \emphmode collapse: the phenomenon where a model concentrates on a few modes of the target distribution during training, despite being statistically capable of expressing them all. In this work, we carry a theoretical investigation of mode collapse for the gradient flow on Gaussian mixture models. We identify the key low-dimensional statistics characterizing the flow, and derive a closed set of low-dimensional equations governing their evolution. Leveraging this compact description, we show that mode collapse is present even in statistically favorable scenarios, and identify two key mechanisms driving it: mean alignment and vanishing weight. Our theoretical findings are consistent with the implementation of VI using normalizing flows, a class of popular generative models, thereby offering practical insights.
@article{soletskyi2024theoretical, author = {Soletskyi, Roman and Gabrié, Marylou and Loureiro, Bruno}, title = {A Theoretical Perspective on Mode Collapse in Variational Inference}, journal = {Machine Learning: Science and Technology}, url = {http://iopscience.iop.org/article/10.1088/2632-2153/adde2a}, year = {2025} }
AISTATS
Fundamental computational limits of weak learnability in high-dimensional multi-index models

Emanuele Troiani , Yatin Dandi , Leonardo Defilippis , and 3 more authors

In Proceedings of The 28th International Conference on Artificial Intelligence and Statistics , 03–05 may 2025

Abs Bib PDF

Multi-index models - functions which only depend on the covariates through a non-linear transformation of their projection on a subspace - are a useful benchmark for investigating feature learning with neural networks. This paper examines the theoretical boundaries of efficient learnability in this hypothesis class, focusing particularly on the minimum sample complexity required for weakly recovering their low-dimensional structure with first-order iterative algorithms, in the high-dimensional regime where the number of samples is n=αd is proportional to the covariate dimension d. Our findings unfold in three parts: (i) first, we identify under which conditions a \textittrivial subspace can be learned with a single step of a first-order algorithm for any α>0; (ii) second, in the case where the trivial subspace is empty, we provide necessary and sufficient conditions for the existence of an \it easy subspace consisting of directions that can be learned only above a certain sample complexity α>\alpha_c. The critical threshold \alpha_c marks the presence of a computational phase transition, in the sense that it is conjectured that no efficient iterative algorithm can succeed for α<\alpha_c. In a limited but interesting set of really hard directions -akin to the parity problem- \alpha_c is found to diverge. Finally, (iii) we demonstrate that interactions between different directions can result in an intricate hierarchical learning phenomenon, where some directions can be learned sequentially when coupled to easier ones. Our analytical approach is built on the optimality of approximate message-passing algorithms among first-order iterative methods, delineating the fundamental learnability limit across a broad spectrum of algorithms, including neural networks trained with gradient descent.
@inproceedings{troiani2024fundamental, title = {Fundamental computational limits of weak learnability in high-dimensional multi-index models}, author = {Troiani, Emanuele and Dandi, Yatin and Defilippis, Leonardo and Zdeborova, Lenka and Loureiro, Bruno and Krzakala, Florent}, booktitle = {Proceedings of The 28th International Conference on Artificial Intelligence and Statistics}, pages = {2467--2475}, year = {2025}, editor = {Li, Yingzhen and Mandt, Stephan and Agrawal, Shipra and Khan, Emtiyaz}, volume = {258}, series = {Proceedings of Machine Learning Research}, month = {03--05 May}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v258/troiani25a.html} }
AISTATS
A High Dimensional Statistical Model for Adversarial Training: Geometry and Trade-Offs

Kasimir Tanner , Matteo Vilucchio , Bruno Loureiro , and 1 more author

In Proceedings of The 28th International Conference on Artificial Intelligence and Statistics , 03–05 may 2025

Abs Bib PDF

This work investigates adversarial training in the context of margin-based linear classifiers in the high-dimensional regime where the dimension d and the number of data points n diverge with a fixed ratio α= n / d. We introduce a tractable mathematical model where the interplay between the data and adversarial attacker geometries can be studied, while capturing the core phenomenology observed in the adversarial robustness literature. Our main theoretical contribution is an exact asymptotic description of the sufficient statistics for the adversarial empirical risk minimiser, under generic convex and non-increasing losses for a Block Feature Model. Our result allow us to precisely characterise which directions in the data are associated with a higher generalisation/robustness trade-off, as defined by a robustness and a usefulness metric. This goes beyond previous models in the literature, which fail to capture a difference in performance between adversarially trained models in the high sample complexity regime. In particular, we unveil the existence of directions which can be defended without penalising accuracy. Finally, we show the advantage of defending non-robust features during training, identifying a uniform protection as an inherently effective defence mechanism.
@inproceedings{tanner2024high, title = {A High Dimensional Statistical Model for Adversarial Training: Geometry and Trade-Offs}, author = {Tanner, Kasimir and Vilucchio, Matteo and Loureiro, Bruno and Krzakala, Florent}, booktitle = {Proceedings of The 28th International Conference on Artificial Intelligence and Statistics}, pages = {2530--2538}, year = {2025}, editor = {Li, Yingzhen and Mandt, Stephan and Agrawal, Shipra and Khan, Emtiyaz}, volume = {258}, series = {Proceedings of Machine Learning Research}, month = {03--05 May}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v258/tanner25a.html} }

2024

On the Geometry of Regularization in Adversarial Training: High-Dimensional Asymptotics and Generalization Bounds

Matteo Vilucchio , Nikolaos Tsilivis , Bruno Loureiro , and 1 more author

03–05 may 2024

Bib

@misc{vilucchio2024geometry,
  title = {On the Geometry of Regularization in Adversarial Training: High-Dimensional Asymptotics and Generalization Bounds},
  author = {Vilucchio, Matteo and Tsilivis, Nikolaos and Loureiro, Bruno and Kempe, Julia},
  year = {2024},
  eprint = {2410.16073},
  archiveprefix = {arXiv},
  primaryclass = {stat.ML}
}

ICML
Online Learning and Information Exponents: The Importance of Batch size & Time/Complexity Tradeoffs

Luca Arnaboldi , Yatin Dandi , Florent Krzakala , and 3 more authors

In Proceedings of the 41st International Conference on Machine Learning , 21–27 jul 2024

Abs Bib PDF

We study the impact of the batch size n_b on the iteration time T of training two-layer neural networks with one-pass stochastic gradient descent (SGD) on multi-index target functions of isotropic covariates. We characterize the optimal batch size minimizing the iteration time as a function of the hardness of the target, as characterized by the information exponents. We show that performing gradient updates with large batches n_b ≲d^\frac\ell2 minimizes the training time without changing the total sample complexity, where \ell is the information exponent of the target to be learned and d is the input dimension. However, larger batch sizes than n_b ≫d^\frac\ell2 are detrimental for improving the time complexity of SGD. We provably overcome this fundamental limitation via a different training protocol, Correlation loss SGD, which suppresses the auto-correlation terms in the loss function. We show that one can track the training progress by a system of low-dimensional ordinary differential equations (ODEs). Finally, we validate our theoretical results with numerical experiments.
@inproceedings{arnaboldi2024online, title = {Online Learning and Information Exponents: The Importance of Batch size & {T}ime/{C}omplexity Tradeoffs}, author = {Arnaboldi, Luca and Dandi, Yatin and Krzakala, Florent and Loureiro, Bruno and Pesce, Luca and Stephan, Ludovic}, booktitle = {Proceedings of the 41st International Conference on Machine Learning}, pages = {1730--1762}, year = {2024}, editor = {Salakhutdinov, Ruslan and Kolter, Zico and Heller, Katherine and Weller, Adrian and Oliver, Nuria and Scarlett, Jonathan and Berkenkamp, Felix}, volume = {235}, series = {Proceedings of Machine Learning Research}, month = {21--27 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v235/arnaboldi24a.html} }

NeurIPS

Dimension-free deterministic equivalents and scaling laws for random feature regression

Leonardo Defilippis , Bruno Loureiro , and Theodor Misiakiewicz

In Advances in Neural Information Processing Systems , 21–27 jul 2024

Bib

@inproceedings{defilippis2024dimensionfree,
  author = {Defilippis, Leonardo and Loureiro, Bruno and Misiakiewicz, Theodor},
  booktitle = {Advances in Neural Information Processing Systems},
  editor = {Globerson, A. and Mackey, L. and Belgrave, D. and Fan, A. and Paquet, U. and Tomczak, J. and Zhang, C.},
  pages = {104630--104693},
  publisher = {Curran Associates, Inc.},
  title = {Dimension-free deterministic equivalents and scaling laws for random feature regression},
  url = {https://proceedings.neurips.cc/paper_files/paper/2024/file/bd18189308a4c45c7d71ca83acf3deaa-Paper-Conference.pdf},
  volume = {37},
  year = {2024}
}

UAI
Analysis of Bootstrap and Subsampling in High-dimensional Regularized Regression

Lucas Clarté , Adrien Vandenbroucque , Guillaume Dalle , and 3 more authors

In Proceedings of the Fortieth Conference on Uncertainty in Artificial Intelligence , 15–19 jul 2024

Abs Bib PDF

We investigate popular resampling methods for estimating the uncertainty of statistical models, such as subsampling, bootstrap and the jackknife, and their performance in high-dimensional supervised regression tasks. We provide a tight asymptotic description of the biases and variances estimated by these methods in the context of generalized linear models, such as ridge and logistic regression, taking the limit where the number of samples n and dimension d of the covariates grow at a comparable rate: α=n/d fixed. Our findings are three-fold: i) resampling methods are fraught with problems in high dimensions and exhibit the double-descent-like behavior typical of these situations; ii) only when αis large enough do they provide consistent and reliable error estimations (we give convergence rates); iii) in the over-parametrized regime α<1 relevant to modern machine learning practice, their predictions are not consistent, even with optimal regularization.
@inproceedings{clarté2024analysis, title = {Analysis of Bootstrap and Subsampling in High-dimensional Regularized Regression}, author = {Clart\'e, Lucas and Vandenbroucque, Adrien and Dalle, Guillaume and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'a, Lenka}, booktitle = {Proceedings of the Fortieth Conference on Uncertainty in Artificial Intelligence}, pages = {787--819}, year = {2024}, editor = {Kiyavash, Negar and Mooij, Joris M.}, volume = {244}, series = {Proceedings of Machine Learning Research}, month = {15--19 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v244/clarte24a.html} }
ICML
Asymptotics of Learning with Deep Structured (Random) Features

Dominik Schröder , Daniil Dmitriev , Hugo Cui , and 1 more author

In Proceedings of the 41st International Conference on Machine Learning , 21–27 jul 2024

Abs Bib PDF

For a large class of feature maps we provide a tight asymptotic characterisation of the test error associated with learning the readout layer, in the high-dimensional limit where the input dimension, hidden layer widths, and number of training samples are proportionally large. This characterization is formulated in terms of the population covariance of the features. Our work is partially motivated by the problem of learning with Gaussian rainbow neural networks, namely deep non-linear fully-connected networks with random but structured weights, whose row-wise covariances are further allowed to depend on the weights of previous layers. For such networks we also derive a closed-form formula for the feature covariance in terms of the weight matrices. We further find that in some cases our results can capture feature maps learned by deep, finite-width neural networks trained under gradient descent.
@inproceedings{schroder2024asymptotics, title = {Asymptotics of Learning with Deep Structured ({R}andom) Features}, author = {Schr\"{o}der, Dominik and Dmitriev, Daniil and Cui, Hugo and Loureiro, Bruno}, booktitle = {Proceedings of the 41st International Conference on Machine Learning}, pages = {43862--43894}, year = {2024}, editor = {Salakhutdinov, Ruslan and Kolter, Zico and Heller, Katherine and Weller, Adrian and Oliver, Nuria and Scarlett, Jonathan and Berkenkamp, Felix}, volume = {235}, series = {Proceedings of Machine Learning Research}, month = {21--27 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v235/schroder24a.html} }
ICML
Asymptotics of feature learning in two-layer networks after one gradient-step

Hugo Cui , Luca Pesce , Yatin Dandi , and 4 more authors

In Proceedings of the 41st International Conference on Machine Learning , 21–27 jul 2024

Abs Bib PDF

In this manuscript, we investigate the problem of how two-layer neural networks learn features from data, and improve over the kernel regime, after being trained with a single gradient descent step. Leveraging the insight from (Ba et al., 2022), we model the trained network by a spiked Random Features (sRF) model. Further building on recent progress on Gaussian universality (Dandi et al., 2023), we provide an exact asymptotic description of the generalization error of the sRF in the high-dimensional limit where the number of samples, the width, and the input dimension grow at a proportional rate. The resulting characterization for sRFs also captures closely the learning curves of the original network model. This enables us to understand how adapting to the data is crucial for the network to efficiently learn non-linear functions in the direction of the gradient - where at initialization it can only express linear functions in this regime.
@inproceedings{cui2024asymptotics, title = {Asymptotics of feature learning in two-layer networks after one gradient-step}, author = {Cui, Hugo and Pesce, Luca and Dandi, Yatin and Krzakala, Florent and Lu, Yue and Zdeborova, Lenka and Loureiro, Bruno}, booktitle = {Proceedings of the 41st International Conference on Machine Learning}, pages = {9662--9695}, year = {2024}, editor = {Salakhutdinov, Ruslan and Kolter, Zico and Heller, Katherine and Weller, Adrian and Oliver, Nuria and Scarlett, Jonathan and Berkenkamp, Felix}, volume = {235}, series = {Proceedings of Machine Learning Research}, month = {21--27 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v235/cui24d.html} }
High-dimensional robust regression under heavy-tailed data: asymptotics and universality

Urte Adomaityte , Leonardo Defilippis , Bruno Loureiro , and 1 more author

Journal of Statistical Mechanics: Theory and Experiment, Nov 2024

Abs Bib

We investigate the high-dimensional properties of robust regression estimators in the presence of heavy-tailed contamination of both the covariates and response functions. In particular, we provide a sharp asymptotic characterisation of M-estimators trained on a family of elliptical covariate and noise data distributions including cases where second and higher moments do not exist. We show that, despite being consistent, the Huber loss with optimally tuned location parameter δ is suboptimal in the high-dimensional regime in the presence of heavy-tailed noise, highlighting the necessity of further regularisation to achieve optimal performance. This result also uncovers the existence of a transition in δ as a function of the sample complexity and contamination. Moreover, we derive the decay rates for the excess risk of ridge regression. We show that, while it is both optimal and universal for covariate distributions with finite second moment, its decay rate can be considerably faster when the covariates’ second moment does not exist. Finally, we show that our formulas readily generalise to a richer family of models and data distributions, such as generalised linear estimation with arbitrary convex regularisation trained on mixture models.
@article{adomaityte2024, doi = {10.1088/1742-5468/ad65e6}, url = {https://dx.doi.org/10.1088/1742-5468/ad65e6}, year = {2024}, month = nov, publisher = {IOP Publishing}, volume = {2024}, number = {11}, pages = {114002}, author = {Adomaityte, Urte and Defilippis, Leonardo and Loureiro, Bruno and Sicuro, Gabriele}, title = {High-dimensional robust regression under heavy-tailed data: asymptotics and universality}, journal = {Journal of Statistical Mechanics: Theory and Experiment} }

JMLR

How Two-Layer Neural Networks Learn, One (Giant) Step at a Time

Yatin Dandi , Florent Krzakala , Bruno Loureiro , and 2 more authors

Journal of Machine Learning Research, Nov 2024

Bib

@article{dandi2023learning,
  author = {Dandi, Yatin and Krzakala, Florent and Loureiro, Bruno and Pesce, Luca and Stephan, Ludovic},
  title = {How Two-Layer Neural Networks Learn, One (Giant) Step at a Time},
  journal = {Journal of Machine Learning Research},
  year = {2024},
  volume = {25},
  number = {349},
  pages = {1--65},
  url = {http://jmlr.org/papers/v25/23-1543.html}
}

PRE

Gaussian universality of perceptrons with random labels

Federica Gerace , Florent Krzakala , Bruno Loureiro , and 2 more authors

Phys. Rev. E, Mar 2024

arXiv Bib

@article{gerace2022gaussian,
  title = {Gaussian universality of perceptrons with random labels},
  author = {Gerace, Federica and Krzakala, Florent and Loureiro, Bruno and Stephan, Ludovic and Zdeborov\'a, Lenka},
  journal = {Phys. Rev. E},
  volume = {109},
  issue = {3},
  pages = {034305},
  numpages = {18},
  year = {2024},
  month = mar,
  publisher = {American Physical Society},
  doi = {10.1103/PhysRevE.109.034305},
  url = {https://link.aps.org/doi/10.1103/PhysRevE.109.034305},
}

2023

Escaping mediocrity: how two-layer networks learn hard single-index models with SGD

Luca Arnaboldi , Florent Krzakala , Bruno Loureiro , and 1 more author

Mar 2023

arXiv
UAI
Expectation consistency for calibration of neural networks

Lucas Clarté , Bruno Loureiro , Florent Krzakala , and 1 more author

In Proceedings of the Thirty-Ninth Conference on Uncertainty in Artificial Intelligence , 31 jul–04 aug 2023

Abs arXiv Bib HTML PDF

Despite their incredible performance, it is well reported that deep neural networks tend to be overoptimistic about their prediction confidence. Finding effective and efficient calibration methods for neural networks is therefore an important endeavour towards better uncertainty quantification in deep learning. In this manuscript, we introduce a novel calibration technique named expectation consistency (EC), consisting of a post-training rescaling of the last layer weights by enforcing that the average validation confidence coincides with the average proportion of correct labels. First, we show that the EC method achieves similar calibration performance to temperature scaling (TS) across different neural network architectures and data sets, all while requiring similar validation samples and computational resources. However, we argue that EC provides a principled method grounded on a Bayesian optimality principle known as the Nishimori identity. Next, we provide an asymptotic characterization of both TS and EC in a synthetic setting and show that their performance crucially depends on the target function. In particular, we discuss examples where EC significantly outperforms TS.
@inproceedings{clarte2023ec, title = {Expectation consistency for calibration of neural networks}, author = {Clart\'e, Lucas and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'a, Lenka}, booktitle = {Proceedings of the Thirty-Ninth Conference on Uncertainty in Artificial Intelligence}, pages = {443--453}, year = {2023}, editor = {Evans, Robin J. and Shpitser, Ilya}, volume = {216}, series = {Proceedings of Machine Learning Research}, month = {31 Jul--04 Aug}, publisher = {PMLR}, }
COLT
From high-dimensional & mean-field dynamics to dimensionless ODEs: A unifying approach to SGD in two-layers networks

Luca Arnaboldi , Ludovic Stephan , Florent Krzakala , and 1 more author

In Proceedings of Thirty Sixth Conference on Learning Theory , 12–15 jul 2023

Abs arXiv Bib HTML PDF

This manuscript investigates the one-pass stochastic gradient descent (SGD) dynamics of a two-layer neural network trained on Gaussian data and labels generated by a similar, though not necessarily identical, target function. We rigorously analyse the limiting dynamics via a deterministic and low-dimensional description in terms of the sufficient statistics for the population risk. Our unifying analysis bridges different regimes of interest, such as the classical gradient-flow regime of vanishing learning rate, the high-dimensional regime of large input dimension, and the overparameterised “mean-field” regime of large network width, covering as well the intermediate regimes where the limiting dynamics is determined by the interplay between these behaviours. In particular, in the high-dimensional limit, the infinite-width dynamics is found to remain close to a low-dimensional subspace spanned by the target principal directions. Our results therefore provide a unifying picture of the limiting SGD dynamics with synthetic data.
@inproceedings{arnaboldi2023high, title = {From high-dimensional & mean-field dynamics to dimensionless ODEs: A unifying approach to SGD in two-layers networks}, author = {Arnaboldi, Luca and Stephan, Ludovic and Krzakala, Florent and Loureiro, Bruno}, booktitle = {Proceedings of Thirty Sixth Conference on Learning Theory}, pages = {1199--1227}, year = {2023}, editor = {Neu, Gergely and Rosasco, Lorenzo}, volume = {195}, series = {Proceedings of Machine Learning Research}, month = {12--15 Jul}, publisher = {PMLR}, }

NeurIPS

Universality laws for Gaussian mixtures in generalized linear models

Yatin Dandi , Ludovic Stephan , Florent Krzakala , and 2 more authors

In Advances in Neural Information Processing Systems , 12–15 jul 2023

Bib

@inproceedings{Dandi2023,
  author = {Dandi, Yatin and Stephan, Ludovic and Krzakala, Florent and Loureiro, Bruno and Zdeborov\'{a}, Lenka},
  booktitle = {Advances in Neural Information Processing Systems},
  editor = {Oh, A. and Neumann, T. and Globerson, A. and Saenko, K. and Hardt, M. and Levine, S.},
  pages = {54754--54768},
  publisher = {Curran Associates, Inc.},
  title = {Universality laws for Gaussian mixtures in generalized linear models},
  url = {https://proceedings.neurips.cc/paper_files/paper/2023/file/abccb8a90b30d45b948360ba41f5a20f-Paper-Conference.pdf},
  volume = {36},
  year = {2023}
}

ICML
Are Gaussian Data All You Need? The Extents and Limits of Universality in High-Dimensional Generalized Linear Estimation

Luca Pesce , Florent Krzakala , Bruno Loureiro , and 1 more author

In Proceedings of the 40th International Conference on Machine Learning , 23–29 jul 2023

Abs arXiv Bib HTML PDF

In this manuscript we consider the problem of generalized linear estimation on Gaussian mixture data with labels given by a single-index model. Our first result is a sharp asymptotic expression for the test and training errors in the high-dimensional regime. Motivated by the recent stream of results on the Gaussian universality of the test and training errors in generalized linear estimation, we ask ourselves the question: "when is a single Gaussian enough to characterize the error?". Our formula allows us to give sharp answers to this question, both in the positive and negative directions. More precisely, we show that the sufficient conditions for Gaussian universality (or lack thereof) crucially depend on the alignment between the target weights and the means and covariances of the mixture clusters, which we precisely quantify. In the particular case of least-squares interpolation, we prove a strong universality property of the training error and show it follows a simple, closed-form expression. Finally, we apply our results to real datasets, clarifying some recent discussions in the literature about Gaussian universality of the errors in this context.
@inproceedings{pesce2023gaussian, title = {Are {G}aussian Data All You Need? {T}he Extents and Limits of Universality in High-Dimensional Generalized Linear Estimation}, author = {Pesce, Luca and Krzakala, Florent and Loureiro, Bruno and Stephan, Ludovic}, booktitle = {Proceedings of the 40th International Conference on Machine Learning}, pages = {27680--27708}, year = {2023}, editor = {Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan}, volume = {202}, series = {Proceedings of Machine Learning Research}, month = {23--29 Jul}, publisher = {PMLR}, }
ICML
Deterministic equivalent and error universality of deep random features learning

Dominik Schröder , Hugo Cui , Daniil Dmitriev , and 1 more author

In Proceedings of the 40th International Conference on Machine Learning , 23–29 jul 2023

Abs arXiv Bib PDF

This manuscript considers the problem of learning a random Gaussian network function using a fully connected network with frozen intermediate layers and trainable readout layer. This problem can be seen as a natural generalization of the widely studied random features model to deeper architectures. First, we prove Gaussian universality of the test error in a ridge regression setting where the learner and target networks share the same intermediate layers, and provide a sharp asymptotic formula for it. Establishing this result requires proving a deterministic equivalent for traces of the deep random features sample covariance matrices which can be of independent interest. Second, we conjecture the asymptotic Gaussian universality of the test error in the more general setting of arbitrary convex losses and generic learner/target architectures. We provide extensive numerical evidence for this conjecture, which requires the derivation of closed-form expressions for the layer-wise post-activation population covariances. In light of our results, we investigate the interplay between architecture design and implicit regularization.
@inproceedings{schroder2023deterministic, title = {Deterministic equivalent and error universality of deep random features learning}, author = {Schr\"{o}der, Dominik and Cui, Hugo and Dmitriev, Daniil and Loureiro, Bruno}, booktitle = {Proceedings of the 40th International Conference on Machine Learning}, pages = {30285--30320}, year = {2023}, editor = {Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan}, volume = {202}, series = {Proceedings of Machine Learning Research}, month = {23--29 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v202/schroder23a.html}, }
AISTATS
On double-descent in uncertainty quantification in overparametrized models

Lucas Clarte , Bruno Loureiro , Florent Krzakala , and 1 more author

In Proceedings of The 26th International Conference on Artificial Intelligence and Statistics , 25–27 apr 2023

Abs arXiv Bib HTML PDF

Uncertainty quantification is a central challenge in reliable and trustworthy machine learning. Naive measures such as last-layer scores are well-known to yield overconfident estimates in the context of overparametrized neural networks. Several methods, ranging from temperature scaling to different Bayesian treatments of neural networks, have been proposed to mitigate overconfidence, most often supported by the numerical observation that they yield better calibrated uncertainty measures. In this work, we provide a sharp comparison between popular uncertainty measures for binary classification in a mathematically tractable model for overparametrized neural networks: the random features model. We discuss a trade-off between classification accuracy and calibration, unveiling a double descent behavior in the calibration curve of optimally regularised estimators as a function of overparametrization. This is in contrast with the empirical Bayes method, which we show to be well calibrated in our setting despite the higher generalization error and overparametrization.
@inproceedings{clarte2022overparametrized, title = {On double-descent in uncertainty quantification in overparametrized models}, author = {Clarte, Lucas and Loureiro, Bruno and Krzakala, Florent and Zdeborova, Lenka}, booktitle = {Proceedings of The 26th International Conference on Artificial Intelligence and Statistics}, pages = {7089--7125}, year = {2023}, editor = {Ruiz, Francisco and Dy, Jennifer and van de Meent, Jan-Willem}, volume = {206}, series = {Proceedings of Machine Learning Research}, month = {25--27 Apr}, publisher = {PMLR}, }
MLST
Learning curves for the multi-class teacher–student perceptron

Elisabetta Cornacchia , Francesca Mignacco , Rodrigo Veiga , and 3 more authors

Machine Learning: Science and Technology, Feb 2023

Abs arXiv Bib HTML

One of the most classical results in high-dimensional learning theory provides a closed-form expression for the generalisation error of binary classification with a single-layer teacher–student perceptron on i.i.d. Gaussian inputs. Both Bayes-optimal (BO) estimation and empirical risk minimisation (ERM) were extensively analysed in this setting. At the same time, a considerable part of modern machine learning practice concerns multi-class classification. Yet, an analogous analysis for the multi-class teacher–student perceptron was missing. In this manuscript we fill this gap by deriving and evaluating asymptotic expressions for the BO and ERM generalisation errors in the high-dimensional regime. For Gaussian teacher, we investigate the performance of ERM with both cross-entropy and square losses, and explore the role of ridge regularisation in approaching Bayes-optimality. In particular, we observe that regularised cross-entropy minimisation yields close-to-optimal accuracy. Instead, for Rademacher teacher we show that a first-order phase transition arises in the BO performance.
@article{Cornacchia_2023, doi = {10.1088/2632-2153/acb428}, year = {2023}, month = feb, publisher = {IOP Publishing}, volume = {4}, number = {1}, pages = {015019}, author = {Cornacchia, Elisabetta and Mignacco, Francesca and Veiga, Rodrigo and Gerbelot, Cédric and Loureiro, Bruno and Zdeborová, Lenka}, title = {Learning curves for the multi-class teacher–student perceptron}, journal = {Machine Learning: Science and Technology}, }
MLST
Theoretical characterization of uncertainty in high-dimensional linear classification

Lucas Clarté , Bruno Loureiro , Florent Krzakala , and 1 more author

Machine Learning: Science and Technology, Jun 2023

Abs arXiv Bib HTML

Being able to reliably assess not only the accuracy but also the uncertainty of models’ predictions is an important endeavor in modern machine learning. Even if the model generating the data and labels is known, computing the intrinsic uncertainty after learning the model from a limited number of samples amounts to sampling the corresponding posterior probability measure. Such sampling is computationally challenging in high-dimensional problems and theoretical results on heuristic uncertainty estimators in high-dimensions are thus scarce. In this manuscript, we characterize uncertainty for learning from a limited number of samples of high-dimensional Gaussian input data and labels generated by the probit model. In this setting, the Bayesian uncertainty (i.e. the posterior marginals) can be asymptotically obtained by the approximate message passing algorithm, bypassing the canonical but costly Monte Carlo sampling of the posterior. We then provide a closed-form formula for the joint statistics between the logistic classifier, the uncertainty of the statistically optimal Bayesian classifier and the ground-truth probit uncertainty. The formula allows us to investigate the calibration of the logistic classifier learning from a limited amount of samples. We discuss how over-confidence can be mitigated by appropriately regularizing.
@article{Clarté_2023, doi = {10.1088/2632-2153/acd749}, year = {2023}, month = jun, publisher = {IOP Publishing}, volume = {4}, number = {2}, pages = {025029}, author = {Clarté, Lucas and Loureiro, Bruno and Krzakala, Florent and Zdeborová, Lenka}, title = {Theoretical characterization of uncertainty in high-dimensional linear classification}, journal = {Machine Learning: Science and Technology}, }
MLST
Error scaling laws for kernel classification under source and capacity conditions

Hugo Cui , Bruno Loureiro , Florent Krzakala , and 1 more author

Machine Learning: Science and Technology, Aug 2023

Abs arXiv Bib HTML

In this manuscript we consider the problem of kernel classification. While worst-case bounds on the decay rate of the prediction error with the number of samples are known for some classifiers, they often fail to accurately describe the learning curves of real data sets. In this work, we consider the important class of data sets satisfying the standard source and capacity conditions, comprising a number of real data sets as we show numerically. Under the Gaussian design, we derive the decay rates for the misclassification (prediction) error as a function of the source and capacity coefficients. We do so for two standard kernel classification settings, namely margin-maximizing support vector machines and ridge classification, and contrast the two methods. We find that our rates tightly describe the learning curves for this class of data sets, and are also observed on real data. Our results can also be seen as an explicit prediction of the exponents of a scaling law for kernel classification that is accurate on some real datasets.
@article{cui2022error, doi = {10.1088/2632-2153/acf041}, year = {2023}, month = aug, publisher = {IOP Publishing}, volume = {4}, number = {3}, pages = {035033}, author = {Cui, Hugo and Loureiro, Bruno and Krzakala, Florent and Zdeborová, Lenka}, title = {Error scaling laws for kernel classification under source and capacity conditions}, journal = {Machine Learning: Science and Technology}, }
IEEE TIT
Bayesian Inference With Nonlinear Generative Models: Comments on Secure Learning

Ali Bereyhi , Bruno Loureiro , Florent Krzakala , and 2 more authors

IEEE Transactions on Information Theory, Dec 2023

Abs arXiv Bib HTML

Unlike the classical linear model, nonlinear generative models have been addressed sparsely in the literature of statistical learning. This work aims to shed light on these models and their secrecy potential. To this end, we invoke the replica method to derive the asymptotic normalized cross entropy in an inverse probability problem whose generative model is described by a Gaussian random field with a generic covariance function. Our derivations further demonstrate the asymptotic statistical decoupling of the Bayesian estimator and specify the decoupled setting for a given nonlinear model. The replica solution depicts that strictly nonlinear models establish an all-or-nothing phase transition: there exists a critical load at which the optimal Bayesian inference changes from perfect to an uncorrelated learning. Based on this finding, we design a new secure coding scheme which achieves the secrecy capacity of the wiretap channel. This interesting result implies that strictly nonlinear generative models are perfectly secured without any secure coding. We justify this latter statement through the analysis of an illustrative model for perfectly secure and reliable inference.
@article{bereyhi2022bayesian, author = {Bereyhi, Ali and Loureiro, Bruno and Krzakala, Florent and Müller, Ralf R. and Schulz-Baldes, Hermann}, journal = {IEEE Transactions on Information Theory}, title = {Bayesian Inference With Nonlinear Generative Models: Comments on Secure Learning}, year = {2023}, volume = {69}, number = {12}, pages = {7998-8028}, keywords = {}, doi = {10.1109/TIT.2023.3325187}, issn = {1557-9654}, month = dec, }

2022

NeurIPS

Subspace clustering in high-dimensions: Phase transitions & Statistical-to-Computational gap

Luca Pesce , Bruno Loureiro , Florent Krzakala , and 1 more author

In Advances in Neural Information Processing Systems , Dec 2022

arXiv Bib HTML PDF

@inproceedings{pesce2022subspace,
  author = {Pesce, Luca and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'{a}, Lenka},
  booktitle = {Advances in Neural Information Processing Systems},
  editor = {Koyejo, S. and Mohamed, S. and Agarwal, A. and Belgrave, D. and Cho, K. and Oh, A.},
  pages = {27087--27099},
  publisher = {Curran Associates, Inc.},
  title = {Subspace clustering in high-dimensions: Phase transitions \&amp; Statistical-to-Computational gap},
  volume = {35},
  year = {2022},
}

NeurIPS

Phase diagram of Stochastic Gradient Descent in high-dimensional two-layer neural networks

Rodrigo Veiga , Ludovic Stephan , Bruno Loureiro , and 2 more authors

In Advances in Neural Information Processing Systems , Dec 2022

arXiv Bib PDF

@inproceedings{veiga2022phase,
  author = {Veiga, Rodrigo and Stephan, Ludovic and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'{a}, Lenka},
  booktitle = {Advances in Neural Information Processing Systems},
  editor = {Koyejo, S. and Mohamed, S. and Agarwal, A. and Belgrave, D. and Cho, K. and Oh, A.},
  pages = {23244--23255},
  publisher = {Curran Associates, Inc.},
  title = {Phase diagram of Stochastic Gradient Descent in high-dimensional two-layer neural networks},
  volume = {35},
  year = {2022},
}

ICML
Fluctuations, Bias, Variance & Ensemble of Learners: Exact Asymptotics for Convex Losses in High-Dimension

Bruno Loureiro , Cedric Gerbelot , Maria Refinetti , and 2 more authors

In Proceedings of the 39th International Conference on Machine Learning , 17–23 jul 2022

Abs arXiv Bib PDF

From the sampling of data to the initialisation of parameters, randomness is ubiquitous in modern Machine Learning practice. Understanding the statistical fluctuations engendered by the different sources of randomness in prediction is therefore key to understanding robust generalisation. In this manuscript we develop a quantitative and rigorous theory for the study of fluctuations in an ensemble of generalised linear models trained on different, but correlated, features in high-dimensions. In particular, we provide a complete description of the asymptotic joint distribution of the empirical risk minimiser for generic convex loss and regularisation in the high-dimensional limit. Our result encompasses a rich set of classification and regression tasks, such as the lazy regime of overparametrised neural networks, or equivalently the random features approximation of kernels. While allowing to study directly the mitigating effect of ensembling (or bagging) on the bias-variance decomposition of the test error, our analysis also helps disentangle the contribution of statistical fluctuations, and the singular role played by the interpolation threshold that are at the roots of the “double-descent” phenomenon.
@inproceedings{pmlr-v162-loureiro22a, title = {Fluctuations, Bias, Variance & Ensemble of Learners: Exact Asymptotics for Convex Losses in High-Dimension}, author = {Loureiro, Bruno and Gerbelot, Cedric and Refinetti, Maria and Sicuro, Gabriele and Krzakala, Florent}, booktitle = {Proceedings of the 39th International Conference on Machine Learning}, pages = {14283--14314}, year = {2022}, editor = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan}, volume = {162}, series = {Proceedings of Machine Learning Research}, month = {17--23 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v162/loureiro22a.html}, }
MSML
The Gaussian equivalence of generative models for learning with shallow neural networks

Sebastian Goldt , Bruno Loureiro , Galen Reeves , and 3 more authors

In Proceedings of the 2nd Mathematical and Scientific Machine Learning Conference , 16–19 aug 2022

Abs arXiv Bib HTML PDF

Understanding the impact of data structure on the computational tractability of learning is a key challenge for the theory of neural networks. Many theoretical works do not explicitly model training data, or assume that inputs are drawn component-wise independently from some simple probability distribution. Here, we go beyond this simple paradigm by studying the performance of neural networks trained on data drawn from pre-trained generative models. This is possible due to a Gaussian equivalence stating that the key metrics of interest, such as the training and test errors, can be fully captured by an appropriately chosen Gaussian model. We provide three strands of rigorous, analytical and numerical evidence corroborating this equivalence. First, we establish rigorous conditions for the Gaussian equivalence to hold in the case of single-layer generative models, as well as deterministic rates for convergence in distribution. Second, we leverage this equivalence to derive a closed set of equations describing the generalisation performance of two widely studied machine learning problems: two-layer neural networks trained using one-pass stochastic gradient descent, and full-batch pre-learned features or kernel methods. Finally, we perform experiments demonstrating how our theory applies to deep, pre-trained generative models. These results open a viable path to the theoretical study of machine learning models with realistic data.
@inproceedings{goldt2020phase, title = {The Gaussian equivalence of generative models for learning with shallow neural networks}, author = {Goldt, Sebastian and Loureiro, Bruno and Reeves, Galen and Krzakala, Florent and Mezard, Marc and Zdeborova, Lenka}, booktitle = {Proceedings of the 2nd Mathematical and Scientific Machine Learning Conference}, pages = {426--471}, year = {2022}, editor = {Bruna, Joan and Hesthaven, Jan and Zdeborova, Lenka}, volume = {145}, series = {Proceedings of Machine Learning Research}, month = {16--19 Aug}, publisher = {PMLR}, }

2021

NeurIPS

Learning Gaussian Mixtures with Generalized Linear Models: Precise Asymptotics in High-dimensions

Bruno Loureiro , Gabriele Sicuro , Cedric Gerbelot , and 3 more authors

In Advances in Neural Information Processing Systems , 16–19 aug 2021

arXiv Bib HTML PDF

@inproceedings{loureiro2021learning,
  author = {Loureiro, Bruno and Sicuro, Gabriele and Gerbelot, Cedric and Pacco, Alessandro and Krzakala, Florent and Zdeborov\'{a}, Lenka},
  booktitle = {Advances in Neural Information Processing Systems},
  editor = {Ranzato, M. and Beygelzimer, A. and Dauphin, Y. and Liang, P.S. and Vaughan, J. Wortman},
  pages = {10144--10157},
  publisher = {Curran Associates, Inc.},
  title = {Learning Gaussian Mixtures with Generalized Linear Models: Precise Asymptotics in High-dimensions},
  volume = {34},
  year = {2021},
}

NeurIPS

Generalization Error Rates in Kernel Regression: The Crossover from the Noiseless to Noisy Regime

Hugo Cui , Bruno Loureiro , Florent Krzakala , and 1 more author

In Advances in Neural Information Processing Systems , 16–19 aug 2021

arXiv Bib HTML PDF

@inproceedings{cui2021generalization,
  author = {Cui, Hugo and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'{a}, Lenka},
  booktitle = {Advances in Neural Information Processing Systems},
  editor = {Ranzato, M. and Beygelzimer, A. and Dauphin, Y. and Liang, P.S. and Vaughan, J. Wortman},
  pages = {10131--10143},
  publisher = {Curran Associates, Inc.},
  title = {Generalization Error Rates in Kernel Regression: The Crossover from the Noiseless to Noisy Regime},
  volume = {34},
  year = {2021},
}

NeurIPS

Learning curves of generic features maps for realistic datasets with a teacher-student model

Bruno Loureiro , Cedric Gerbelot , Hugo Cui , and 4 more authors

In Advances in Neural Information Processing Systems , 16–19 aug 2021

arXiv Bib HTML PDF

@inproceedings{loureiro2021capturing,
  author = {Loureiro, Bruno and Gerbelot, Cedric and Cui, Hugo and Goldt, Sebastian and Krzakala, Florent and Mezard, Marc and Zdeborov\'{a}, Lenka},
  booktitle = {Advances in Neural Information Processing Systems},
  editor = {Ranzato, M. and Beygelzimer, A. and Dauphin, Y. and Liang, P.S. and Vaughan, J. Wortman},
  pages = {18137--18151},
  publisher = {Curran Associates, Inc.},
  title = {Learning curves of generic features maps for realistic datasets with a teacher-student model},
  volume = {34},
  year = {2021},
}

IEEE-TIT
The Spiked Matrix Model With Generative Priors

Benjamin Aubin , Bruno Loureiro , Antoine Maillard , and 2 more authors

IEEE Transactions on Information Theory, Feb 2021

Abs arXiv Bib HTML

We investigate the statistical and algorithmic properties of random neural-network generative priors in a simple inference problem: spiked-matrix estimation. We establish a rigorous expression for the performance of the Bayes-optimal estimator in the high-dimensional regime, and identify the statistical threshold for weak-recovery of the spike. Next, we derive a message-passing algorithm taking into account the latent structure of the spike, and show that its performance is asymptotically optimal for natural choices of the generative network architecture. The absence of an algorithmic gap in this case is in stark contrast to known results for sparse spikes, another popular prior for modelling low-dimensional signals, and for which no algorithm is known to achieve the optimal statistical threshold. Finally, we show that linearising our message passing algorithm yields a simple spectral method also achieving the optimal threshold for reconstruction. We conclude with an experiment on a real data set showing that our bespoke spectral method outperforms vanilla PCA.
@article{aubin2019spiked, author = {Aubin, Benjamin and Loureiro, Bruno and Maillard, Antoine and Krzakala, Florent and Zdeborová, Lenka}, journal = {IEEE Transactions on Information Theory}, title = {The Spiked Matrix Model With Generative Priors}, year = {2021}, volume = {67}, number = {2}, pages = {1156-1181}, keywords = {}, doi = {10.1109/TIT.2020.3033985}, issn = {1557-9654}, month = feb, }

2020

NeurIPS

Phase retrieval in high dimensions: Statistical and computational phase transitions

Antoine Maillard , Bruno Loureiro , Florent Krzakala , and 1 more author

In Advances in Neural Information Processing Systems , Feb 2020

arXiv Bib HTML PDF

@inproceedings{maillard2020phase,
  author = {Maillard, Antoine and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'{a}, Lenka},
  booktitle = {Advances in Neural Information Processing Systems},
  editor = {Larochelle, H. and Ranzato, M. and Hadsell, R. and Balcan, M.F. and Lin, H.},
  pages = {11071--11082},
  publisher = {Curran Associates, Inc.},
  title = {Phase retrieval in high dimensions: Statistical and computational phase transitions},
  volume = {33},
  year = {2020},
}

ICML
Generalisation error in learning with random features and the hidden manifold model

Federica Gerace , Bruno Loureiro , Florent Krzakala , and 2 more authors

In Proceedings of the 37th International Conference on Machine Learning , 13–18 jul 2020

Abs arXiv Bib HTML PDF

We study generalised linear regression and classification for a synthetically generated dataset encompassing different problems of interest, such as learning with random features, neural networks in the lazy training regime, and the hidden manifold model. We consider the high-dimensional regime and using the replica method from statistical physics, we provide a closed-form expression for the asymptotic generalisation performance in these problems, valid in both the under- and over-parametrised regimes and for a broad choice of generalised linear model loss functions. In particular, we show how to obtain analytically the so-called double descent behaviour for logistic regression with a peak at the interpolation threshold, we illustrate the superiority of orthogonal against random Gaussian projections in learning with random features, and discuss the role played by correlations in the data generated by the hidden manifold model. Beyond the interest in these particular problems, the theoretical formalism introduced in this manuscript provides a path to further extensions to more complex tasks.
@inproceedings{Gerace2020, title = {Generalisation error in learning with random features and the hidden manifold model}, author = {Gerace, Federica and Loureiro, Bruno and Krzakala, Florent and Mezard, Marc and Zdeborova, Lenka}, booktitle = {Proceedings of the 37th International Conference on Machine Learning}, pages = {3452--3462}, year = {2020}, editor = {III, Hal Daumé and Singh, Aarti}, volume = {119}, series = {Proceedings of Machine Learning Research}, month = {13--18 Jul}, publisher = {PMLR}, }
MSML
Exact asymptotics for phase retrieval and compressed sensing with random generative priors

Benjamin Aubin , Bruno Loureiro , Antoine Baker , and 2 more authors

In Proceedings of The First Mathematical and Scientific Machine Learning Conference , 20–24 jul 2020

Abs arXiv Bib HTML PDF

We consider the problem of compressed sensing and of (real-valued) phase retrieval with random measurement matrix. We derive sharp asymptotics for the information-theoretically optimal performance and for the best known polynomial algorithm for an ensemble of generative priors consisting of fully connected deep neural networks with random weight matrices and arbitrary activations. We compare the performance to sparse separable priors and conclude that in all cases analysed generative priors have a smaller statistical-to-algorithmic gap than sparse priors, giving theoretical support to previous experimental observations that generative priors might be advantageous in terms of algorithmic performance. In particular, while sparsity does not allow to perform compressive phase retrieval efficiently close to its information-theoretic limit, it is found that under the random generative prior compressed phase retrieval becomes tractable.
@inproceedings{aubin2019phase, title = {Exact asymptotics for phase retrieval and compressed sensing with random generative priors}, author = {Aubin, Benjamin and Loureiro, Bruno and Baker, Antoine and Krzakala, Florent and Zdeborov\'a, Lenka}, booktitle = {Proceedings of The First Mathematical and Scientific Machine Learning Conference}, pages = {55--73}, year = {2020}, editor = {Lu, Jianfeng and Ward, Rachel}, volume = {107}, series = {Proceedings of Machine Learning Research}, month = {20--24 Jul}, publisher = {PMLR}, }

2018

JHEP

Coherence effects in disordered geometries with a field-theory dual

Tomás Andrade , Antonio M. García-García , and Bruno Loureiro

Journal of High Energy Physics, Mar 2018

arXiv Bib HTML

@article{Andrade2018,
  author = {Andrade, Tom{\'a}s and Garc{\'i}a-Garc{\'i}a, Antonio M. and Loureiro, Bruno},
  title = {Coherence effects in disordered geometries with a field-theory dual},
  journal = {Journal of High Energy Physics},
  year = {2018},
  month = mar,
  day = {29},
  volume = {2018},
  number = {3},
  pages = {187},
  issn = {1029-8479},
  doi = {10.1007/JHEP03(2018)187},
}

PRL

Chaotic-Integrable Transition in the Sachdev-Ye-Kitaev Model

Antonio M. Garcı́a-Garcı́a , Bruno Loureiro , Aurelio Romero-Bermúdez , and 1 more author

Phys. Rev. Lett., Jun 2018

arXiv Bib HTML

@article{PhysRevLett.120.241603,
  title = {Chaotic-Integrable Transition in the Sachdev-Ye-Kitaev Model},
  author = {Garc\'{\i}a-Garc\'{\i}a, Antonio M. and Loureiro, Bruno and Romero-Berm\'udez, Aurelio and Tezuka, Masaki},
  journal = {Phys. Rev. Lett.},
  volume = {120},
  issue = {24},
  pages = {241603},
  numpages = {6},
  year = {2018},
  month = jun,
  publisher = {American Physical Society},
  doi = {10.1103/PhysRevLett.120.241603},
}

2016

PRD

Transport in a gravity dual with a varying gravitational coupling constant

Antonio M. Garcı́a-Garcı́a , Bruno Loureiro , and Aurelio Romero-Bermúdez

Phys. Rev. D, Oct 2016

arXiv Bib HTML

@article{PhysRevD.94.086007,
  title = {Transport in a gravity dual with a varying gravitational coupling constant},
  author = {Garc\'{\i}a-Garc\'{\i}a, Antonio M. and Loureiro, Bruno and Romero-Berm\'udez, Aurelio},
  journal = {Phys. Rev. D},
  volume = {94},
  issue = {8},
  pages = {086007},
  numpages = {21},
  year = {2016},
  month = oct,
  publisher = {American Physical Society},
  doi = {10.1103/PhysRevD.94.086007},
}

PRD

Marginal and irrelevant disorder in Einstein-Maxwell backgrounds

Antonio M. Garcı́a-Garcı́a , and Bruno Loureiro

Phys. Rev. D, Mar 2016

arXiv Bib HTML

@article{PhysRevD.93.065025,
  title = {Marginal and irrelevant disorder in Einstein-Maxwell backgrounds},
  author = {Garc\'{\i}a-Garc\'{\i}a, Antonio M. and Loureiro, Bruno},
  journal = {Phys. Rev. D},
  volume = {93},
  issue = {6},
  pages = {065025},
  numpages = {13},
  year = {2016},
  month = mar,
  publisher = {American Physical Society},
  doi = {10.1103/PhysRevD.93.065025},
}