common.py 1.2 KB

1234567891011121314151617181920212223242526272829303132
  1. import math
  2. import torch
  3. from torch import nn
  4. def trunc_normal_init_(tensor: torch.Tensor, std: float = 1.0, lower: float = -2.0, upper: float = 2.0):
  5. # NOTE: PyTorch nn.init.trunc_normal_ is not mathematically correct, the std dev is not actually the std dev of initialized tensor
  6. # This function is a PyTorch version of jax truncated normal init (default init method in flax)
  7. # https://github.com/jax-ml/jax/blob/main/jax/_src/random.py#L807-L848
  8. # https://github.com/jax-ml/jax/blob/main/jax/_src/nn/initializers.py#L162-L199
  9. with torch.no_grad():
  10. if std == 0:
  11. tensor.zero_()
  12. else:
  13. sqrt2 = math.sqrt(2)
  14. a = math.erf(lower / sqrt2)
  15. b = math.erf(upper / sqrt2)
  16. z = (b - a) / 2
  17. c = (2 * math.pi) ** -0.5
  18. pdf_u = c * math.exp(-0.5 * lower ** 2)
  19. pdf_l = c * math.exp(-0.5 * upper ** 2)
  20. comp_std = std / math.sqrt(1 - (upper * pdf_u - lower * pdf_l) / z - ((pdf_u - pdf_l) / z) ** 2)
  21. tensor.uniform_(a, b)
  22. tensor.erfinv_()
  23. tensor.mul_(sqrt2 * comp_std)
  24. tensor.clip_(lower * comp_std, upper * comp_std)
  25. return tensor