publications
publications by categories in reversed chronological order. generated by jekyll-scholar.
2025
- Asymptotics of SGD in Sequence-Single Index Models and Single-Layer Attention NetworksLuca Arnaboldi , Bruno Loureiro , Ludovic Stephan , and 2 more authors2025
@misc{arnaboldi2025sequence, title = {Asymptotics of SGD in Sequence-Single Index Models and Single-Layer Attention Networks}, author = {Arnaboldi, Luca and Loureiro, Bruno and Stephan, Ludovic and Krzakala, Florent and Zdeborova, Lenka}, year = {2025}, eprint = {2506.02651}, archiveprefix = {arXiv}, primaryclass = {stat.ML} }
- Dynamical mean-field analysis of adaptive Langevin diffusions: Replica-symmetric fixed point and empirical BayesZhou Fan , Justin Ko , Bruno Loureiro , and 2 more authors2025
@misc{fan2025replica, title = {Dynamical mean-field analysis of adaptive Langevin diffusions: Replica-symmetric fixed point and empirical Bayes}, author = {Fan, Zhou and Ko, Justin and Loureiro, Bruno and Lu, Yue M. and Shen, Yandi}, year = {2025}, eprint = {2504.15558}, archiveprefix = {arXiv}, primaryclass = {math.ST} }
- Dynamical mean-field analysis of adaptive Langevin diffusions: Propagation-of-chaos and convergence of the linear responseZhou Fan , Justin Ko , Bruno Loureiro , and 2 more authors2025
@misc{fan2025propagation, title = {Dynamical mean-field analysis of adaptive Langevin diffusions: Propagation-of-chaos and convergence of the linear response}, author = {Fan, Zhou and Ko, Justin and Loureiro, Bruno and Lu, Yue M. and Shen, Yandi}, year = {2025}, eprint = {2504.15556}, archiveprefix = {arXiv}, primaryclass = {math.ST} }
- Optimal Spectral Transitions in High-Dimensional Multi-Index ModelsLeonardo Defilippis , Yatin Dandi , Pierre Mergny , and 2 more authors2025
@misc{defilippis2025optimal, title = {Optimal Spectral Transitions in High-Dimensional Multi-Index Models}, author = {Defilippis, Leonardo and Dandi, Yatin and Mergny, Pierre and Krzakala, Florent and Loureiro, Bruno}, year = {2025}, eprint = {2502.02545}, archiveprefix = {arXiv}, primaryclass = {cs.LG} }
- AISTATSA Random Matrix Theory Perspective on the Spectrum of Learned Features and Asymptotic Generalization CapabilitiesYatin Dandi , Luca Pesce , Hugo Cui , and 3 more authorsIn Proceedings of The 28th International Conference on Artificial Intelligence and Statistics , 03–05 may 2025
A key property of neural networks is their capacity of adapting to data during training. Yet, our current mathematical understanding of feature learning and its relationship to generalization remain limited. In this work, we provide a random matrix analysis of how fully-connected two-layer neural networks adapt to the target function after a single, but aggressive, gradient descent step. We rigorously establish the equivalence between the updated features and an isotropic spiked random feature model, in the limit of large batch size. For the latter model, we derive a deterministic equivalent description of the feature empirical covariance matrix in terms of certain low-dimensional operators. This allows us to sharply characterize the impact of training in the asymptotic feature spectrum, and in particular, provides a theoretical grounding for how the tails of the feature spectrum modify with training. The deterministic equivalent further yields the exact asymptotic generalization error, shedding light on the mechanisms behind its improvement in the presence of feature learning. Our result goes beyond standard random matrix ensembles, and therefore we believe it is of independent technical interest. Different from previous work, our result holds in the challenging maximal learning rate regime, is fully rigorous and allows for finitely supported second layer initialization, which turns out to be crucial for studying the functional expressivity of the learned features. This provides a sharp description of the impact of feature learning in the generalization of two-layer neural networks, beyond the random features and lazy training regimes.
@inproceedings{dandi2024random, title = {A Random Matrix Theory Perspective on the Spectrum of Learned Features and Asymptotic Generalization Capabilities}, author = {Dandi, Yatin and Pesce, Luca and Cui, Hugo and Krzakala, Florent and Lu, Yue and Loureiro, Bruno}, booktitle = {Proceedings of The 28th International Conference on Artificial Intelligence and Statistics}, pages = {2224--2232}, year = {2025}, editor = {Li, Yingzhen and Mandt, Stephan and Agrawal, Shipra and Khan, Emtiyaz}, volume = {258}, series = {Proceedings of Machine Learning Research}, month = {03--05 May}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v258/dandi25a.html} }
- MLSTA Theoretical Perspective on Mode Collapse in Variational InferenceRoman Soletskyi , Marylou Gabrié , and Bruno LoureiroMachine Learning: Science and Technology, 03–05 may 2025
While deep learning has expanded the possibilities for highly expressive variational families, the practical benefits of these tools for variational inference (VI) are often limited by the minimization of the traditional Kullback-Leibler objective, which can yield suboptimal solutions. A major challenge in this context is \emphmode collapse: the phenomenon where a model concentrates on a few modes of the target distribution during training, despite being statistically capable of expressing them all. In this work, we carry a theoretical investigation of mode collapse for the gradient flow on Gaussian mixture models. We identify the key low-dimensional statistics characterizing the flow, and derive a closed set of low-dimensional equations governing their evolution. Leveraging this compact description, we show that mode collapse is present even in statistically favorable scenarios, and identify two key mechanisms driving it: mean alignment and vanishing weight. Our theoretical findings are consistent with the implementation of VI using normalizing flows, a class of popular generative models, thereby offering practical insights.
@article{soletskyi2024theoretical, author = {Soletskyi, Roman and Gabrié, Marylou and Loureiro, Bruno}, title = {A Theoretical Perspective on Mode Collapse in Variational Inference}, journal = {Machine Learning: Science and Technology}, url = {http://iopscience.iop.org/article/10.1088/2632-2153/adde2a}, year = {2025} }
- AISTATSFundamental computational limits of weak learnability in high-dimensional multi-index modelsEmanuele Troiani , Yatin Dandi , Leonardo Defilippis , and 3 more authorsIn Proceedings of The 28th International Conference on Artificial Intelligence and Statistics , 03–05 may 2025
Multi-index models - functions which only depend on the covariates through a non-linear transformation of their projection on a subspace - are a useful benchmark for investigating feature learning with neural networks. This paper examines the theoretical boundaries of efficient learnability in this hypothesis class, focusing particularly on the minimum sample complexity required for weakly recovering their low-dimensional structure with first-order iterative algorithms, in the high-dimensional regime where the number of samples is n=αd is proportional to the covariate dimension d. Our findings unfold in three parts: (i) first, we identify under which conditions a \textittrivial subspace can be learned with a single step of a first-order algorithm for any α>0; (ii) second, in the case where the trivial subspace is empty, we provide necessary and sufficient conditions for the existence of an \it easy subspace consisting of directions that can be learned only above a certain sample complexity α>\alpha_c. The critical threshold \alpha_c marks the presence of a computational phase transition, in the sense that it is conjectured that no efficient iterative algorithm can succeed for α<\alpha_c. In a limited but interesting set of really hard directions -akin to the parity problem- \alpha_c is found to diverge. Finally, (iii) we demonstrate that interactions between different directions can result in an intricate hierarchical learning phenomenon, where some directions can be learned sequentially when coupled to easier ones. Our analytical approach is built on the optimality of approximate message-passing algorithms among first-order iterative methods, delineating the fundamental learnability limit across a broad spectrum of algorithms, including neural networks trained with gradient descent.
@inproceedings{troiani2024fundamental, title = {Fundamental computational limits of weak learnability in high-dimensional multi-index models}, author = {Troiani, Emanuele and Dandi, Yatin and Defilippis, Leonardo and Zdeborova, Lenka and Loureiro, Bruno and Krzakala, Florent}, booktitle = {Proceedings of The 28th International Conference on Artificial Intelligence and Statistics}, pages = {2467--2475}, year = {2025}, editor = {Li, Yingzhen and Mandt, Stephan and Agrawal, Shipra and Khan, Emtiyaz}, volume = {258}, series = {Proceedings of Machine Learning Research}, month = {03--05 May}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v258/troiani25a.html} }
- AISTATSA High Dimensional Statistical Model for Adversarial Training: Geometry and Trade-OffsKasimir Tanner , Matteo Vilucchio , Bruno Loureiro , and 1 more authorIn Proceedings of The 28th International Conference on Artificial Intelligence and Statistics , 03–05 may 2025
This work investigates adversarial training in the context of margin-based linear classifiers in the high-dimensional regime where the dimension d and the number of data points n diverge with a fixed ratio α= n / d. We introduce a tractable mathematical model where the interplay between the data and adversarial attacker geometries can be studied, while capturing the core phenomenology observed in the adversarial robustness literature. Our main theoretical contribution is an exact asymptotic description of the sufficient statistics for the adversarial empirical risk minimiser, under generic convex and non-increasing losses for a Block Feature Model. Our result allow us to precisely characterise which directions in the data are associated with a higher generalisation/robustness trade-off, as defined by a robustness and a usefulness metric. This goes beyond previous models in the literature, which fail to capture a difference in performance between adversarially trained models in the high sample complexity regime. In particular, we unveil the existence of directions which can be defended without penalising accuracy. Finally, we show the advantage of defending non-robust features during training, identifying a uniform protection as an inherently effective defence mechanism.
@inproceedings{tanner2024high, title = {A High Dimensional Statistical Model for Adversarial Training: Geometry and Trade-Offs}, author = {Tanner, Kasimir and Vilucchio, Matteo and Loureiro, Bruno and Krzakala, Florent}, booktitle = {Proceedings of The 28th International Conference on Artificial Intelligence and Statistics}, pages = {2530--2538}, year = {2025}, editor = {Li, Yingzhen and Mandt, Stephan and Agrawal, Shipra and Khan, Emtiyaz}, volume = {258}, series = {Proceedings of Machine Learning Research}, month = {03--05 May}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v258/tanner25a.html} }
2024
- On the Geometry of Regularization in Adversarial Training: High-Dimensional Asymptotics and Generalization BoundsMatteo Vilucchio , Nikolaos Tsilivis , Bruno Loureiro , and 1 more author03–05 may 2024
@misc{vilucchio2024geometry, title = {On the Geometry of Regularization in Adversarial Training: High-Dimensional Asymptotics and Generalization Bounds}, author = {Vilucchio, Matteo and Tsilivis, Nikolaos and Loureiro, Bruno and Kempe, Julia}, year = {2024}, eprint = {2410.16073}, archiveprefix = {arXiv}, primaryclass = {stat.ML} }
- ICMLOnline Learning and Information Exponents: The Importance of Batch size & Time/Complexity TradeoffsLuca Arnaboldi , Yatin Dandi , Florent Krzakala , and 3 more authorsIn Proceedings of the 41st International Conference on Machine Learning , 21–27 jul 2024
We study the impact of the batch size n_b on the iteration time T of training two-layer neural networks with one-pass stochastic gradient descent (SGD) on multi-index target functions of isotropic covariates. We characterize the optimal batch size minimizing the iteration time as a function of the hardness of the target, as characterized by the information exponents. We show that performing gradient updates with large batches n_b ≲d^\frac\ell2 minimizes the training time without changing the total sample complexity, where \ell is the information exponent of the target to be learned and d is the input dimension. However, larger batch sizes than n_b ≫d^\frac\ell2 are detrimental for improving the time complexity of SGD. We provably overcome this fundamental limitation via a different training protocol, Correlation loss SGD, which suppresses the auto-correlation terms in the loss function. We show that one can track the training progress by a system of low-dimensional ordinary differential equations (ODEs). Finally, we validate our theoretical results with numerical experiments.
@inproceedings{arnaboldi2024online, title = {Online Learning and Information Exponents: The Importance of Batch size & {T}ime/{C}omplexity Tradeoffs}, author = {Arnaboldi, Luca and Dandi, Yatin and Krzakala, Florent and Loureiro, Bruno and Pesce, Luca and Stephan, Ludovic}, booktitle = {Proceedings of the 41st International Conference on Machine Learning}, pages = {1730--1762}, year = {2024}, editor = {Salakhutdinov, Ruslan and Kolter, Zico and Heller, Katherine and Weller, Adrian and Oliver, Nuria and Scarlett, Jonathan and Berkenkamp, Felix}, volume = {235}, series = {Proceedings of Machine Learning Research}, month = {21--27 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v235/arnaboldi24a.html} }
- NeurIPSDimension-free deterministic equivalents and scaling laws for random feature regressionLeonardo Defilippis , Bruno Loureiro , and Theodor MisiakiewiczIn Advances in Neural Information Processing Systems , 21–27 jul 2024
@inproceedings{defilippis2024dimensionfree, author = {Defilippis, Leonardo and Loureiro, Bruno and Misiakiewicz, Theodor}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Globerson, A. and Mackey, L. and Belgrave, D. and Fan, A. and Paquet, U. and Tomczak, J. and Zhang, C.}, pages = {104630--104693}, publisher = {Curran Associates, Inc.}, title = {Dimension-free deterministic equivalents and scaling laws for random feature regression}, url = {https://proceedings.neurips.cc/paper_files/paper/2024/file/bd18189308a4c45c7d71ca83acf3deaa-Paper-Conference.pdf}, volume = {37}, year = {2024} }
- UAIAnalysis of Bootstrap and Subsampling in High-dimensional Regularized RegressionLucas Clarté , Adrien Vandenbroucque , Guillaume Dalle , and 3 more authorsIn Proceedings of the Fortieth Conference on Uncertainty in Artificial Intelligence , 15–19 jul 2024
We investigate popular resampling methods for estimating the uncertainty of statistical models, such as subsampling, bootstrap and the jackknife, and their performance in high-dimensional supervised regression tasks. We provide a tight asymptotic description of the biases and variances estimated by these methods in the context of generalized linear models, such as ridge and logistic regression, taking the limit where the number of samples n and dimension d of the covariates grow at a comparable rate: α=n/d fixed. Our findings are three-fold: i) resampling methods are fraught with problems in high dimensions and exhibit the double-descent-like behavior typical of these situations; ii) only when αis large enough do they provide consistent and reliable error estimations (we give convergence rates); iii) in the over-parametrized regime α<1 relevant to modern machine learning practice, their predictions are not consistent, even with optimal regularization.
@inproceedings{clarté2024analysis, title = {Analysis of Bootstrap and Subsampling in High-dimensional Regularized Regression}, author = {Clart\'e, Lucas and Vandenbroucque, Adrien and Dalle, Guillaume and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'a, Lenka}, booktitle = {Proceedings of the Fortieth Conference on Uncertainty in Artificial Intelligence}, pages = {787--819}, year = {2024}, editor = {Kiyavash, Negar and Mooij, Joris M.}, volume = {244}, series = {Proceedings of Machine Learning Research}, month = {15--19 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v244/clarte24a.html} }
- ICMLAsymptotics of Learning with Deep Structured (Random) FeaturesDominik Schröder , Daniil Dmitriev , Hugo Cui , and 1 more authorIn Proceedings of the 41st International Conference on Machine Learning , 21–27 jul 2024
For a large class of feature maps we provide a tight asymptotic characterisation of the test error associated with learning the readout layer, in the high-dimensional limit where the input dimension, hidden layer widths, and number of training samples are proportionally large. This characterization is formulated in terms of the population covariance of the features. Our work is partially motivated by the problem of learning with Gaussian rainbow neural networks, namely deep non-linear fully-connected networks with random but structured weights, whose row-wise covariances are further allowed to depend on the weights of previous layers. For such networks we also derive a closed-form formula for the feature covariance in terms of the weight matrices. We further find that in some cases our results can capture feature maps learned by deep, finite-width neural networks trained under gradient descent.
@inproceedings{schroder2024asymptotics, title = {Asymptotics of Learning with Deep Structured ({R}andom) Features}, author = {Schr\"{o}der, Dominik and Dmitriev, Daniil and Cui, Hugo and Loureiro, Bruno}, booktitle = {Proceedings of the 41st International Conference on Machine Learning}, pages = {43862--43894}, year = {2024}, editor = {Salakhutdinov, Ruslan and Kolter, Zico and Heller, Katherine and Weller, Adrian and Oliver, Nuria and Scarlett, Jonathan and Berkenkamp, Felix}, volume = {235}, series = {Proceedings of Machine Learning Research}, month = {21--27 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v235/schroder24a.html} }
- ICMLAsymptotics of feature learning in two-layer networks after one gradient-stepHugo Cui , Luca Pesce , Yatin Dandi , and 4 more authorsIn Proceedings of the 41st International Conference on Machine Learning , 21–27 jul 2024
In this manuscript, we investigate the problem of how two-layer neural networks learn features from data, and improve over the kernel regime, after being trained with a single gradient descent step. Leveraging the insight from (Ba et al., 2022), we model the trained network by a spiked Random Features (sRF) model. Further building on recent progress on Gaussian universality (Dandi et al., 2023), we provide an exact asymptotic description of the generalization error of the sRF in the high-dimensional limit where the number of samples, the width, and the input dimension grow at a proportional rate. The resulting characterization for sRFs also captures closely the learning curves of the original network model. This enables us to understand how adapting to the data is crucial for the network to efficiently learn non-linear functions in the direction of the gradient - where at initialization it can only express linear functions in this regime.
@inproceedings{cui2024asymptotics, title = {Asymptotics of feature learning in two-layer networks after one gradient-step}, author = {Cui, Hugo and Pesce, Luca and Dandi, Yatin and Krzakala, Florent and Lu, Yue and Zdeborova, Lenka and Loureiro, Bruno}, booktitle = {Proceedings of the 41st International Conference on Machine Learning}, pages = {9662--9695}, year = {2024}, editor = {Salakhutdinov, Ruslan and Kolter, Zico and Heller, Katherine and Weller, Adrian and Oliver, Nuria and Scarlett, Jonathan and Berkenkamp, Felix}, volume = {235}, series = {Proceedings of Machine Learning Research}, month = {21--27 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v235/cui24d.html} }
- High-dimensional robust regression under heavy-tailed data: asymptotics and universalityUrte Adomaityte , Leonardo Defilippis , Bruno Loureiro , and 1 more authorJournal of Statistical Mechanics: Theory and Experiment, Nov 2024
We investigate the high-dimensional properties of robust regression estimators in the presence of heavy-tailed contamination of both the covariates and response functions. In particular, we provide a sharp asymptotic characterisation of M-estimators trained on a family of elliptical covariate and noise data distributions including cases where second and higher moments do not exist. We show that, despite being consistent, the Huber loss with optimally tuned location parameter δ is suboptimal in the high-dimensional regime in the presence of heavy-tailed noise, highlighting the necessity of further regularisation to achieve optimal performance. This result also uncovers the existence of a transition in δ as a function of the sample complexity and contamination. Moreover, we derive the decay rates for the excess risk of ridge regression. We show that, while it is both optimal and universal for covariate distributions with finite second moment, its decay rate can be considerably faster when the covariates’ second moment does not exist. Finally, we show that our formulas readily generalise to a richer family of models and data distributions, such as generalised linear estimation with arbitrary convex regularisation trained on mixture models.
@article{adomaityte2024, doi = {10.1088/1742-5468/ad65e6}, url = {https://dx.doi.org/10.1088/1742-5468/ad65e6}, year = {2024}, month = nov, publisher = {IOP Publishing}, volume = {2024}, number = {11}, pages = {114002}, author = {Adomaityte, Urte and Defilippis, Leonardo and Loureiro, Bruno and Sicuro, Gabriele}, title = {High-dimensional robust regression under heavy-tailed data: asymptotics and universality}, journal = {Journal of Statistical Mechanics: Theory and Experiment} }
- JMLRHow Two-Layer Neural Networks Learn, One (Giant) Step at a TimeYatin Dandi , Florent Krzakala , Bruno Loureiro , and 2 more authorsJournal of Machine Learning Research, Nov 2024
@article{dandi2023learning, author = {Dandi, Yatin and Krzakala, Florent and Loureiro, Bruno and Pesce, Luca and Stephan, Ludovic}, title = {How Two-Layer Neural Networks Learn, One (Giant) Step at a Time}, journal = {Journal of Machine Learning Research}, year = {2024}, volume = {25}, number = {349}, pages = {1--65}, url = {http://jmlr.org/papers/v25/23-1543.html} }
- PREGaussian universality of perceptrons with random labelsFederica Gerace , Florent Krzakala , Bruno Loureiro , and 2 more authorsPhys. Rev. E, Mar 2024
@article{gerace2022gaussian, title = {Gaussian universality of perceptrons with random labels}, author = {Gerace, Federica and Krzakala, Florent and Loureiro, Bruno and Stephan, Ludovic and Zdeborov\'a, Lenka}, journal = {Phys. Rev. E}, volume = {109}, issue = {3}, pages = {034305}, numpages = {18}, year = {2024}, month = mar, publisher = {American Physical Society}, doi = {10.1103/PhysRevE.109.034305}, url = {https://link.aps.org/doi/10.1103/PhysRevE.109.034305}, }
2023
- Escaping mediocrity: how two-layer networks learn hard single-index models with SGDLuca Arnaboldi , Florent Krzakala , Bruno Loureiro , and 1 more authorMar 2023
- UAIExpectation consistency for calibration of neural networksLucas Clarté , Bruno Loureiro , Florent Krzakala , and 1 more authorIn Proceedings of the Thirty-Ninth Conference on Uncertainty in Artificial Intelligence , 31 jul–04 aug 2023
Despite their incredible performance, it is well reported that deep neural networks tend to be overoptimistic about their prediction confidence. Finding effective and efficient calibration methods for neural networks is therefore an important endeavour towards better uncertainty quantification in deep learning. In this manuscript, we introduce a novel calibration technique named expectation consistency (EC), consisting of a post-training rescaling of the last layer weights by enforcing that the average validation confidence coincides with the average proportion of correct labels. First, we show that the EC method achieves similar calibration performance to temperature scaling (TS) across different neural network architectures and data sets, all while requiring similar validation samples and computational resources. However, we argue that EC provides a principled method grounded on a Bayesian optimality principle known as the Nishimori identity. Next, we provide an asymptotic characterization of both TS and EC in a synthetic setting and show that their performance crucially depends on the target function. In particular, we discuss examples where EC significantly outperforms TS.
@inproceedings{clarte2023ec, title = {Expectation consistency for calibration of neural networks}, author = {Clart\'e, Lucas and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'a, Lenka}, booktitle = {Proceedings of the Thirty-Ninth Conference on Uncertainty in Artificial Intelligence}, pages = {443--453}, year = {2023}, editor = {Evans, Robin J. and Shpitser, Ilya}, volume = {216}, series = {Proceedings of Machine Learning Research}, month = {31 Jul--04 Aug}, publisher = {PMLR}, }
- COLTFrom high-dimensional & mean-field dynamics to dimensionless ODEs: A unifying approach to SGD in two-layers networksLuca Arnaboldi , Ludovic Stephan , Florent Krzakala , and 1 more authorIn Proceedings of Thirty Sixth Conference on Learning Theory , 12–15 jul 2023
This manuscript investigates the one-pass stochastic gradient descent (SGD) dynamics of a two-layer neural network trained on Gaussian data and labels generated by a similar, though not necessarily identical, target function. We rigorously analyse the limiting dynamics via a deterministic and low-dimensional description in terms of the sufficient statistics for the population risk. Our unifying analysis bridges different regimes of interest, such as the classical gradient-flow regime of vanishing learning rate, the high-dimensional regime of large input dimension, and the overparameterised “mean-field” regime of large network width, covering as well the intermediate regimes where the limiting dynamics is determined by the interplay between these behaviours. In particular, in the high-dimensional limit, the infinite-width dynamics is found to remain close to a low-dimensional subspace spanned by the target principal directions. Our results therefore provide a unifying picture of the limiting SGD dynamics with synthetic data.
@inproceedings{arnaboldi2023high, title = {From high-dimensional & mean-field dynamics to dimensionless ODEs: A unifying approach to SGD in two-layers networks}, author = {Arnaboldi, Luca and Stephan, Ludovic and Krzakala, Florent and Loureiro, Bruno}, booktitle = {Proceedings of Thirty Sixth Conference on Learning Theory}, pages = {1199--1227}, year = {2023}, editor = {Neu, Gergely and Rosasco, Lorenzo}, volume = {195}, series = {Proceedings of Machine Learning Research}, month = {12--15 Jul}, publisher = {PMLR}, }
- NeurIPSUniversality laws for Gaussian mixtures in generalized linear modelsYatin Dandi , Ludovic Stephan , Florent Krzakala , and 2 more authorsIn Advances in Neural Information Processing Systems , 12–15 jul 2023
@inproceedings{Dandi2023, author = {Dandi, Yatin and Stephan, Ludovic and Krzakala, Florent and Loureiro, Bruno and Zdeborov\'{a}, Lenka}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Oh, A. and Neumann, T. and Globerson, A. and Saenko, K. and Hardt, M. and Levine, S.}, pages = {54754--54768}, publisher = {Curran Associates, Inc.}, title = {Universality laws for Gaussian mixtures in generalized linear models}, url = {https://proceedings.neurips.cc/paper_files/paper/2023/file/abccb8a90b30d45b948360ba41f5a20f-Paper-Conference.pdf}, volume = {36}, year = {2023} }
- ICMLAre Gaussian Data All You Need? The Extents and Limits of Universality in High-Dimensional Generalized Linear EstimationLuca Pesce , Florent Krzakala , Bruno Loureiro , and 1 more authorIn Proceedings of the 40th International Conference on Machine Learning , 23–29 jul 2023
In this manuscript we consider the problem of generalized linear estimation on Gaussian mixture data with labels given by a single-index model. Our first result is a sharp asymptotic expression for the test and training errors in the high-dimensional regime. Motivated by the recent stream of results on the Gaussian universality of the test and training errors in generalized linear estimation, we ask ourselves the question: "when is a single Gaussian enough to characterize the error?". Our formula allows us to give sharp answers to this question, both in the positive and negative directions. More precisely, we show that the sufficient conditions for Gaussian universality (or lack thereof) crucially depend on the alignment between the target weights and the means and covariances of the mixture clusters, which we precisely quantify. In the particular case of least-squares interpolation, we prove a strong universality property of the training error and show it follows a simple, closed-form expression. Finally, we apply our results to real datasets, clarifying some recent discussions in the literature about Gaussian universality of the errors in this context.
@inproceedings{pesce2023gaussian, title = {Are {G}aussian Data All You Need? {T}he Extents and Limits of Universality in High-Dimensional Generalized Linear Estimation}, author = {Pesce, Luca and Krzakala, Florent and Loureiro, Bruno and Stephan, Ludovic}, booktitle = {Proceedings of the 40th International Conference on Machine Learning}, pages = {27680--27708}, year = {2023}, editor = {Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan}, volume = {202}, series = {Proceedings of Machine Learning Research}, month = {23--29 Jul}, publisher = {PMLR}, }
- ICMLDeterministic equivalent and error universality of deep random features learningDominik Schröder , Hugo Cui , Daniil Dmitriev , and 1 more authorIn Proceedings of the 40th International Conference on Machine Learning , 23–29 jul 2023
This manuscript considers the problem of learning a random Gaussian network function using a fully connected network with frozen intermediate layers and trainable readout layer. This problem can be seen as a natural generalization of the widely studied random features model to deeper architectures. First, we prove Gaussian universality of the test error in a ridge regression setting where the learner and target networks share the same intermediate layers, and provide a sharp asymptotic formula for it. Establishing this result requires proving a deterministic equivalent for traces of the deep random features sample covariance matrices which can be of independent interest. Second, we conjecture the asymptotic Gaussian universality of the test error in the more general setting of arbitrary convex losses and generic learner/target architectures. We provide extensive numerical evidence for this conjecture, which requires the derivation of closed-form expressions for the layer-wise post-activation population covariances. In light of our results, we investigate the interplay between architecture design and implicit regularization.
@inproceedings{schroder2023deterministic, title = {Deterministic equivalent and error universality of deep random features learning}, author = {Schr\"{o}der, Dominik and Cui, Hugo and Dmitriev, Daniil and Loureiro, Bruno}, booktitle = {Proceedings of the 40th International Conference on Machine Learning}, pages = {30285--30320}, year = {2023}, editor = {Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan}, volume = {202}, series = {Proceedings of Machine Learning Research}, month = {23--29 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v202/schroder23a.html}, }
- AISTATSOn double-descent in uncertainty quantification in overparametrized modelsLucas Clarte , Bruno Loureiro , Florent Krzakala , and 1 more authorIn Proceedings of The 26th International Conference on Artificial Intelligence and Statistics , 25–27 apr 2023
Uncertainty quantification is a central challenge in reliable and trustworthy machine learning. Naive measures such as last-layer scores are well-known to yield overconfident estimates in the context of overparametrized neural networks. Several methods, ranging from temperature scaling to different Bayesian treatments of neural networks, have been proposed to mitigate overconfidence, most often supported by the numerical observation that they yield better calibrated uncertainty measures. In this work, we provide a sharp comparison between popular uncertainty measures for binary classification in a mathematically tractable model for overparametrized neural networks: the random features model. We discuss a trade-off between classification accuracy and calibration, unveiling a double descent behavior in the calibration curve of optimally regularised estimators as a function of overparametrization. This is in contrast with the empirical Bayes method, which we show to be well calibrated in our setting despite the higher generalization error and overparametrization.
@inproceedings{clarte2022overparametrized, title = {On double-descent in uncertainty quantification in overparametrized models}, author = {Clarte, Lucas and Loureiro, Bruno and Krzakala, Florent and Zdeborova, Lenka}, booktitle = {Proceedings of The 26th International Conference on Artificial Intelligence and Statistics}, pages = {7089--7125}, year = {2023}, editor = {Ruiz, Francisco and Dy, Jennifer and van de Meent, Jan-Willem}, volume = {206}, series = {Proceedings of Machine Learning Research}, month = {25--27 Apr}, publisher = {PMLR}, }
- MLSTLearning curves for the multi-class teacher–student perceptronElisabetta Cornacchia , Francesca Mignacco , Rodrigo Veiga , and 3 more authorsMachine Learning: Science and Technology, Feb 2023
One of the most classical results in high-dimensional learning theory provides a closed-form expression for the generalisation error of binary classification with a single-layer teacher–student perceptron on i.i.d. Gaussian inputs. Both Bayes-optimal (BO) estimation and empirical risk minimisation (ERM) were extensively analysed in this setting. At the same time, a considerable part of modern machine learning practice concerns multi-class classification. Yet, an analogous analysis for the multi-class teacher–student perceptron was missing. In this manuscript we fill this gap by deriving and evaluating asymptotic expressions for the BO and ERM generalisation errors in the high-dimensional regime. For Gaussian teacher, we investigate the performance of ERM with both cross-entropy and square losses, and explore the role of ridge regularisation in approaching Bayes-optimality. In particular, we observe that regularised cross-entropy minimisation yields close-to-optimal accuracy. Instead, for Rademacher teacher we show that a first-order phase transition arises in the BO performance.
@article{Cornacchia_2023, doi = {10.1088/2632-2153/acb428}, year = {2023}, month = feb, publisher = {IOP Publishing}, volume = {4}, number = {1}, pages = {015019}, author = {Cornacchia, Elisabetta and Mignacco, Francesca and Veiga, Rodrigo and Gerbelot, Cédric and Loureiro, Bruno and Zdeborová, Lenka}, title = {Learning curves for the multi-class teacher–student perceptron}, journal = {Machine Learning: Science and Technology}, }
- MLSTTheoretical characterization of uncertainty in high-dimensional linear classificationLucas Clarté , Bruno Loureiro , Florent Krzakala , and 1 more authorMachine Learning: Science and Technology, Jun 2023
Being able to reliably assess not only the accuracy but also the uncertainty of models’ predictions is an important endeavor in modern machine learning. Even if the model generating the data and labels is known, computing the intrinsic uncertainty after learning the model from a limited number of samples amounts to sampling the corresponding posterior probability measure. Such sampling is computationally challenging in high-dimensional problems and theoretical results on heuristic uncertainty estimators in high-dimensions are thus scarce. In this manuscript, we characterize uncertainty for learning from a limited number of samples of high-dimensional Gaussian input data and labels generated by the probit model. In this setting, the Bayesian uncertainty (i.e. the posterior marginals) can be asymptotically obtained by the approximate message passing algorithm, bypassing the canonical but costly Monte Carlo sampling of the posterior. We then provide a closed-form formula for the joint statistics between the logistic classifier, the uncertainty of the statistically optimal Bayesian classifier and the ground-truth probit uncertainty. The formula allows us to investigate the calibration of the logistic classifier learning from a limited amount of samples. We discuss how over-confidence can be mitigated by appropriately regularizing.
@article{Clarté_2023, doi = {10.1088/2632-2153/acd749}, year = {2023}, month = jun, publisher = {IOP Publishing}, volume = {4}, number = {2}, pages = {025029}, author = {Clarté, Lucas and Loureiro, Bruno and Krzakala, Florent and Zdeborová, Lenka}, title = {Theoretical characterization of uncertainty in high-dimensional linear classification}, journal = {Machine Learning: Science and Technology}, }
- MLSTError scaling laws for kernel classification under source and capacity conditionsHugo Cui , Bruno Loureiro , Florent Krzakala , and 1 more authorMachine Learning: Science and Technology, Aug 2023
In this manuscript we consider the problem of kernel classification. While worst-case bounds on the decay rate of the prediction error with the number of samples are known for some classifiers, they often fail to accurately describe the learning curves of real data sets. In this work, we consider the important class of data sets satisfying the standard source and capacity conditions, comprising a number of real data sets as we show numerically. Under the Gaussian design, we derive the decay rates for the misclassification (prediction) error as a function of the source and capacity coefficients. We do so for two standard kernel classification settings, namely margin-maximizing support vector machines and ridge classification, and contrast the two methods. We find that our rates tightly describe the learning curves for this class of data sets, and are also observed on real data. Our results can also be seen as an explicit prediction of the exponents of a scaling law for kernel classification that is accurate on some real datasets.
@article{cui2022error, doi = {10.1088/2632-2153/acf041}, year = {2023}, month = aug, publisher = {IOP Publishing}, volume = {4}, number = {3}, pages = {035033}, author = {Cui, Hugo and Loureiro, Bruno and Krzakala, Florent and Zdeborová, Lenka}, title = {Error scaling laws for kernel classification under source and capacity conditions}, journal = {Machine Learning: Science and Technology}, }
- IEEE TITBayesian Inference With Nonlinear Generative Models: Comments on Secure LearningAli Bereyhi , Bruno Loureiro , Florent Krzakala , and 2 more authorsIEEE Transactions on Information Theory, Dec 2023
Unlike the classical linear model, nonlinear generative models have been addressed sparsely in the literature of statistical learning. This work aims to shed light on these models and their secrecy potential. To this end, we invoke the replica method to derive the asymptotic normalized cross entropy in an inverse probability problem whose generative model is described by a Gaussian random field with a generic covariance function. Our derivations further demonstrate the asymptotic statistical decoupling of the Bayesian estimator and specify the decoupled setting for a given nonlinear model. The replica solution depicts that strictly nonlinear models establish an all-or-nothing phase transition: there exists a critical load at which the optimal Bayesian inference changes from perfect to an uncorrelated learning. Based on this finding, we design a new secure coding scheme which achieves the secrecy capacity of the wiretap channel. This interesting result implies that strictly nonlinear generative models are perfectly secured without any secure coding. We justify this latter statement through the analysis of an illustrative model for perfectly secure and reliable inference.
@article{bereyhi2022bayesian, author = {Bereyhi, Ali and Loureiro, Bruno and Krzakala, Florent and Müller, Ralf R. and Schulz-Baldes, Hermann}, journal = {IEEE Transactions on Information Theory}, title = {Bayesian Inference With Nonlinear Generative Models: Comments on Secure Learning}, year = {2023}, volume = {69}, number = {12}, pages = {7998-8028}, keywords = {}, doi = {10.1109/TIT.2023.3325187}, issn = {1557-9654}, month = dec, }
2022
- NeurIPSSubspace clustering in high-dimensions: Phase transitions & Statistical-to-Computational gapLuca Pesce , Bruno Loureiro , Florent Krzakala , and 1 more authorIn Advances in Neural Information Processing Systems , Dec 2022
@inproceedings{pesce2022subspace, author = {Pesce, Luca and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'{a}, Lenka}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Koyejo, S. and Mohamed, S. and Agarwal, A. and Belgrave, D. and Cho, K. and Oh, A.}, pages = {27087--27099}, publisher = {Curran Associates, Inc.}, title = {Subspace clustering in high-dimensions: Phase transitions \& Statistical-to-Computational gap}, volume = {35}, year = {2022}, }
- NeurIPSPhase diagram of Stochastic Gradient Descent in high-dimensional two-layer neural networksRodrigo Veiga , Ludovic Stephan , Bruno Loureiro , and 2 more authorsIn Advances in Neural Information Processing Systems , Dec 2022
@inproceedings{veiga2022phase, author = {Veiga, Rodrigo and Stephan, Ludovic and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'{a}, Lenka}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Koyejo, S. and Mohamed, S. and Agarwal, A. and Belgrave, D. and Cho, K. and Oh, A.}, pages = {23244--23255}, publisher = {Curran Associates, Inc.}, title = {Phase diagram of Stochastic Gradient Descent in high-dimensional two-layer neural networks}, volume = {35}, year = {2022}, }
- ICMLFluctuations, Bias, Variance & Ensemble of Learners: Exact Asymptotics for Convex Losses in High-DimensionBruno Loureiro , Cedric Gerbelot , Maria Refinetti , and 2 more authorsIn Proceedings of the 39th International Conference on Machine Learning , 17–23 jul 2022
From the sampling of data to the initialisation of parameters, randomness is ubiquitous in modern Machine Learning practice. Understanding the statistical fluctuations engendered by the different sources of randomness in prediction is therefore key to understanding robust generalisation. In this manuscript we develop a quantitative and rigorous theory for the study of fluctuations in an ensemble of generalised linear models trained on different, but correlated, features in high-dimensions. In particular, we provide a complete description of the asymptotic joint distribution of the empirical risk minimiser for generic convex loss and regularisation in the high-dimensional limit. Our result encompasses a rich set of classification and regression tasks, such as the lazy regime of overparametrised neural networks, or equivalently the random features approximation of kernels. While allowing to study directly the mitigating effect of ensembling (or bagging) on the bias-variance decomposition of the test error, our analysis also helps disentangle the contribution of statistical fluctuations, and the singular role played by the interpolation threshold that are at the roots of the “double-descent” phenomenon.
@inproceedings{pmlr-v162-loureiro22a, title = {Fluctuations, Bias, Variance & Ensemble of Learners: Exact Asymptotics for Convex Losses in High-Dimension}, author = {Loureiro, Bruno and Gerbelot, Cedric and Refinetti, Maria and Sicuro, Gabriele and Krzakala, Florent}, booktitle = {Proceedings of the 39th International Conference on Machine Learning}, pages = {14283--14314}, year = {2022}, editor = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan}, volume = {162}, series = {Proceedings of Machine Learning Research}, month = {17--23 Jul}, publisher = {PMLR}, url = {https://proceedings.mlr.press/v162/loureiro22a.html}, }
- MSMLThe Gaussian equivalence of generative models for learning with shallow neural networksSebastian Goldt , Bruno Loureiro , Galen Reeves , and 3 more authorsIn Proceedings of the 2nd Mathematical and Scientific Machine Learning Conference , 16–19 aug 2022
Understanding the impact of data structure on the computational tractability of learning is a key challenge for the theory of neural networks. Many theoretical works do not explicitly model training data, or assume that inputs are drawn component-wise independently from some simple probability distribution. Here, we go beyond this simple paradigm by studying the performance of neural networks trained on data drawn from pre-trained generative models. This is possible due to a Gaussian equivalence stating that the key metrics of interest, such as the training and test errors, can be fully captured by an appropriately chosen Gaussian model. We provide three strands of rigorous, analytical and numerical evidence corroborating this equivalence. First, we establish rigorous conditions for the Gaussian equivalence to hold in the case of single-layer generative models, as well as deterministic rates for convergence in distribution. Second, we leverage this equivalence to derive a closed set of equations describing the generalisation performance of two widely studied machine learning problems: two-layer neural networks trained using one-pass stochastic gradient descent, and full-batch pre-learned features or kernel methods. Finally, we perform experiments demonstrating how our theory applies to deep, pre-trained generative models. These results open a viable path to the theoretical study of machine learning models with realistic data.
@inproceedings{goldt2020phase, title = {The Gaussian equivalence of generative models for learning with shallow neural networks}, author = {Goldt, Sebastian and Loureiro, Bruno and Reeves, Galen and Krzakala, Florent and Mezard, Marc and Zdeborova, Lenka}, booktitle = {Proceedings of the 2nd Mathematical and Scientific Machine Learning Conference}, pages = {426--471}, year = {2022}, editor = {Bruna, Joan and Hesthaven, Jan and Zdeborova, Lenka}, volume = {145}, series = {Proceedings of Machine Learning Research}, month = {16--19 Aug}, publisher = {PMLR}, }
2021
- NeurIPSLearning Gaussian Mixtures with Generalized Linear Models: Precise Asymptotics in High-dimensionsBruno Loureiro , Gabriele Sicuro , Cedric Gerbelot , and 3 more authorsIn Advances in Neural Information Processing Systems , 16–19 aug 2021
@inproceedings{loureiro2021learning, author = {Loureiro, Bruno and Sicuro, Gabriele and Gerbelot, Cedric and Pacco, Alessandro and Krzakala, Florent and Zdeborov\'{a}, Lenka}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Ranzato, M. and Beygelzimer, A. and Dauphin, Y. and Liang, P.S. and Vaughan, J. Wortman}, pages = {10144--10157}, publisher = {Curran Associates, Inc.}, title = {Learning Gaussian Mixtures with Generalized Linear Models: Precise Asymptotics in High-dimensions}, volume = {34}, year = {2021}, }
- NeurIPSGeneralization Error Rates in Kernel Regression: The Crossover from the Noiseless to Noisy RegimeHugo Cui , Bruno Loureiro , Florent Krzakala , and 1 more authorIn Advances in Neural Information Processing Systems , 16–19 aug 2021
@inproceedings{cui2021generalization, author = {Cui, Hugo and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'{a}, Lenka}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Ranzato, M. and Beygelzimer, A. and Dauphin, Y. and Liang, P.S. and Vaughan, J. Wortman}, pages = {10131--10143}, publisher = {Curran Associates, Inc.}, title = {Generalization Error Rates in Kernel Regression: The Crossover from the Noiseless to Noisy Regime}, volume = {34}, year = {2021}, }
- NeurIPSLearning curves of generic features maps for realistic datasets with a teacher-student modelBruno Loureiro , Cedric Gerbelot , Hugo Cui , and 4 more authorsIn Advances in Neural Information Processing Systems , 16–19 aug 2021
@inproceedings{loureiro2021capturing, author = {Loureiro, Bruno and Gerbelot, Cedric and Cui, Hugo and Goldt, Sebastian and Krzakala, Florent and Mezard, Marc and Zdeborov\'{a}, Lenka}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Ranzato, M. and Beygelzimer, A. and Dauphin, Y. and Liang, P.S. and Vaughan, J. Wortman}, pages = {18137--18151}, publisher = {Curran Associates, Inc.}, title = {Learning curves of generic features maps for realistic datasets with a teacher-student model}, volume = {34}, year = {2021}, }
- IEEE-TITThe Spiked Matrix Model With Generative PriorsBenjamin Aubin , Bruno Loureiro , Antoine Maillard , and 2 more authorsIEEE Transactions on Information Theory, Feb 2021
We investigate the statistical and algorithmic properties of random neural-network generative priors in a simple inference problem: spiked-matrix estimation. We establish a rigorous expression for the performance of the Bayes-optimal estimator in the high-dimensional regime, and identify the statistical threshold for weak-recovery of the spike. Next, we derive a message-passing algorithm taking into account the latent structure of the spike, and show that its performance is asymptotically optimal for natural choices of the generative network architecture. The absence of an algorithmic gap in this case is in stark contrast to known results for sparse spikes, another popular prior for modelling low-dimensional signals, and for which no algorithm is known to achieve the optimal statistical threshold. Finally, we show that linearising our message passing algorithm yields a simple spectral method also achieving the optimal threshold for reconstruction. We conclude with an experiment on a real data set showing that our bespoke spectral method outperforms vanilla PCA.
@article{aubin2019spiked, author = {Aubin, Benjamin and Loureiro, Bruno and Maillard, Antoine and Krzakala, Florent and Zdeborová, Lenka}, journal = {IEEE Transactions on Information Theory}, title = {The Spiked Matrix Model With Generative Priors}, year = {2021}, volume = {67}, number = {2}, pages = {1156-1181}, keywords = {}, doi = {10.1109/TIT.2020.3033985}, issn = {1557-9654}, month = feb, }
2020
- NeurIPSPhase retrieval in high dimensions: Statistical and computational phase transitionsAntoine Maillard , Bruno Loureiro , Florent Krzakala , and 1 more authorIn Advances in Neural Information Processing Systems , Feb 2020
@inproceedings{maillard2020phase, author = {Maillard, Antoine and Loureiro, Bruno and Krzakala, Florent and Zdeborov\'{a}, Lenka}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Larochelle, H. and Ranzato, M. and Hadsell, R. and Balcan, M.F. and Lin, H.}, pages = {11071--11082}, publisher = {Curran Associates, Inc.}, title = {Phase retrieval in high dimensions: Statistical and computational phase transitions}, volume = {33}, year = {2020}, }
- ICMLGeneralisation error in learning with random features and the hidden manifold modelFederica Gerace , Bruno Loureiro , Florent Krzakala , and 2 more authorsIn Proceedings of the 37th International Conference on Machine Learning , 13–18 jul 2020
We study generalised linear regression and classification for a synthetically generated dataset encompassing different problems of interest, such as learning with random features, neural networks in the lazy training regime, and the hidden manifold model. We consider the high-dimensional regime and using the replica method from statistical physics, we provide a closed-form expression for the asymptotic generalisation performance in these problems, valid in both the under- and over-parametrised regimes and for a broad choice of generalised linear model loss functions. In particular, we show how to obtain analytically the so-called double descent behaviour for logistic regression with a peak at the interpolation threshold, we illustrate the superiority of orthogonal against random Gaussian projections in learning with random features, and discuss the role played by correlations in the data generated by the hidden manifold model. Beyond the interest in these particular problems, the theoretical formalism introduced in this manuscript provides a path to further extensions to more complex tasks.
@inproceedings{Gerace2020, title = {Generalisation error in learning with random features and the hidden manifold model}, author = {Gerace, Federica and Loureiro, Bruno and Krzakala, Florent and Mezard, Marc and Zdeborova, Lenka}, booktitle = {Proceedings of the 37th International Conference on Machine Learning}, pages = {3452--3462}, year = {2020}, editor = {III, Hal Daumé and Singh, Aarti}, volume = {119}, series = {Proceedings of Machine Learning Research}, month = {13--18 Jul}, publisher = {PMLR}, }
- MSMLExact asymptotics for phase retrieval and compressed sensing with random generative priorsBenjamin Aubin , Bruno Loureiro , Antoine Baker , and 2 more authorsIn Proceedings of The First Mathematical and Scientific Machine Learning Conference , 20–24 jul 2020
We consider the problem of compressed sensing and of (real-valued) phase retrieval with random measurement matrix. We derive sharp asymptotics for the information-theoretically optimal performance and for the best known polynomial algorithm for an ensemble of generative priors consisting of fully connected deep neural networks with random weight matrices and arbitrary activations. We compare the performance to sparse separable priors and conclude that in all cases analysed generative priors have a smaller statistical-to-algorithmic gap than sparse priors, giving theoretical support to previous experimental observations that generative priors might be advantageous in terms of algorithmic performance. In particular, while sparsity does not allow to perform compressive phase retrieval efficiently close to its information-theoretic limit, it is found that under the random generative prior compressed phase retrieval becomes tractable.
@inproceedings{aubin2019phase, title = {Exact asymptotics for phase retrieval and compressed sensing with random generative priors}, author = {Aubin, Benjamin and Loureiro, Bruno and Baker, Antoine and Krzakala, Florent and Zdeborov\'a, Lenka}, booktitle = {Proceedings of The First Mathematical and Scientific Machine Learning Conference}, pages = {55--73}, year = {2020}, editor = {Lu, Jianfeng and Ward, Rachel}, volume = {107}, series = {Proceedings of Machine Learning Research}, month = {20--24 Jul}, publisher = {PMLR}, }
2018
- JHEPCoherence effects in disordered geometries with a field-theory dualTomás Andrade , Antonio M. García-García , and Bruno LoureiroJournal of High Energy Physics, Mar 2018
@article{Andrade2018, author = {Andrade, Tom{\'a}s and Garc{\'i}a-Garc{\'i}a, Antonio M. and Loureiro, Bruno}, title = {Coherence effects in disordered geometries with a field-theory dual}, journal = {Journal of High Energy Physics}, year = {2018}, month = mar, day = {29}, volume = {2018}, number = {3}, pages = {187}, issn = {1029-8479}, doi = {10.1007/JHEP03(2018)187}, }
- PRLChaotic-Integrable Transition in the Sachdev-Ye-Kitaev ModelAntonio M. Garcı́a-Garcı́a , Bruno Loureiro , Aurelio Romero-Bermúdez , and 1 more authorPhys. Rev. Lett., Jun 2018
@article{PhysRevLett.120.241603, title = {Chaotic-Integrable Transition in the Sachdev-Ye-Kitaev Model}, author = {Garc\'{\i}a-Garc\'{\i}a, Antonio M. and Loureiro, Bruno and Romero-Berm\'udez, Aurelio and Tezuka, Masaki}, journal = {Phys. Rev. Lett.}, volume = {120}, issue = {24}, pages = {241603}, numpages = {6}, year = {2018}, month = jun, publisher = {American Physical Society}, doi = {10.1103/PhysRevLett.120.241603}, }
2016
- PRDTransport in a gravity dual with a varying gravitational coupling constantAntonio M. Garcı́a-Garcı́a , Bruno Loureiro , and Aurelio Romero-BermúdezPhys. Rev. D, Oct 2016
@article{PhysRevD.94.086007, title = {Transport in a gravity dual with a varying gravitational coupling constant}, author = {Garc\'{\i}a-Garc\'{\i}a, Antonio M. and Loureiro, Bruno and Romero-Berm\'udez, Aurelio}, journal = {Phys. Rev. D}, volume = {94}, issue = {8}, pages = {086007}, numpages = {21}, year = {2016}, month = oct, publisher = {American Physical Society}, doi = {10.1103/PhysRevD.94.086007}, }
- PRDMarginal and irrelevant disorder in Einstein-Maxwell backgroundsAntonio M. Garcı́a-Garcı́a , and Bruno LoureiroPhys. Rev. D, Mar 2016
@article{PhysRevD.93.065025, title = {Marginal and irrelevant disorder in Einstein-Maxwell backgrounds}, author = {Garc\'{\i}a-Garc\'{\i}a, Antonio M. and Loureiro, Bruno}, journal = {Phys. Rev. D}, volume = {93}, issue = {6}, pages = {065025}, numpages = {13}, year = {2016}, month = mar, publisher = {American Physical Society}, doi = {10.1103/PhysRevD.93.065025}, }