Merge pull request dotnet#859 from NiklasGustafsson/main

Adding a few 1.12 APIs.
dayo05 · Dec 1, 2022 · 2230dec · 2230dec
2 parents 03ceea3 + 749b706
commit 2230dec
Show file tree

Hide file tree

Showing 11 changed files with 176 additions and 37 deletions.
diff --git a/RELEASENOTES.md b/RELEASENOTES.md
@@ -2,6 +2,14 @@
 
 Releases, starting with 9/2/2021, are listed with the most recent release at the top.
 
+## NuGet Version 0.99.2
+
+__API Changes__:
+
+Adding 'maximize' argument to the Adadelta optimizer
+Adding linalg.ldl_factor and linalg.ldl_solve
+Adding SoftplusTransform
+
 ## NuGet Version 0.99.1
 
 __Breaking Changes__:

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -51,7 +51,7 @@ jobs:
     buildScript: dotnet build /p:SkipCuda=true /p:SkipNetFxBuild=true -c
     testScript: dotnet test /p:SkipCuda=true /p:SkipNetFxBuild=true --blame -c
     pool:
-      vmImage: 'windows-2022'
+      vmImage: 'windows-2019'
 
 - template: /build/ci/job-template.yml
   parameters:
@@ -60,7 +60,7 @@ jobs:
     buildScript: dotnet build /p:SkipCuda=true /p:SkipNetCoreBuild=true -c
     testScript: dotnet test /p:SkipCuda=true /p:SkipNetCoreBuild=true --blame -c
     pool:
-      vmImage: 'windows-2022'
+      vmImage: 'windows-2019'
 
 - template: /build/ci/job-template.yml
   parameters:
@@ -133,7 +133,7 @@ jobs:
     DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
     DOTNET_MULTILEVEL_LOOKUP: 0
   pool:
-    vmImage: 'windows-2022'
+    vmImage: 'windows-2019'
 
   steps:
 
@@ -209,7 +209,7 @@ jobs:
     DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
     DOTNET_MULTILEVEL_LOOKUP: 0
   pool:
-    vmImage: 'windows-2022'
+    vmImage: 'windows-2019'
   steps:
 
   # We are 10GB space-constrained on the Azure Pipelines CI system so clean up what we can
@@ -346,7 +346,7 @@ jobs:
     DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
     DOTNET_MULTILEVEL_LOOKUP: 0
   pool:
-    vmImage: 'windows-2022'
+    vmImage: 'windows-2019'
   steps:
 
   # We are 10GB space-constrained on the Azure Pipelines CI system so clean up what we can
@@ -458,7 +458,7 @@ jobs:
   variables:
   - group: SignClient Credentials
   pool:
-    vmImage: 'windows-2022'
+    vmImage: 'windows-2019'
   steps:
 
   - task: DotNetCoreCLI@2
@@ -507,7 +507,7 @@ jobs:
   variables:
   - group: SignClient Credentials
   pool:
-    vmImage: 'windows-2022'
+    vmImage: 'windows-2019'
   steps:
 
   - task: DotNetCoreCLI@2
@@ -586,7 +586,7 @@ jobs:
     DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
     DOTNET_MULTILEVEL_LOOKUP: 0
   pool:
-    vmImage: 'windows-2022'
+    vmImage: 'windows-2019'
   steps:
 
   # Push packages to feed
@@ -687,7 +687,7 @@ jobs:
     DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
     DOTNET_MULTILEVEL_LOOKUP: 0
   pool:
-    vmImage: 'windows-2022'
+    vmImage: 'windows-2019'
   steps:
 
   # Push packages to feed
@@ -772,7 +772,7 @@ jobs:
     DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
     DOTNET_MULTILEVEL_LOOKUP: 0
   pool:
-    vmImage: 'windows-2022'
+    vmImage: 'windows-2019'
   # container: UbuntuContainer
   # Exact copy of the dependency install above - TODO share this somewhere
   steps:

diff --git a/src/Native/LibTorchSharp/THSLinearAlgebra.cpp b/src/Native/LibTorchSharp/THSLinearAlgebra.cpp
@@ -148,6 +148,28 @@ Tensor THSLinalg_lu_factor(const Tensor A, const bool pivot, Tensor* pivots)
     return ResultTensor(std::get<0>(res));
 }
 
+Tensor THSLinalg_ldl_factor(const Tensor A, const bool hermitian, Tensor* pivots)
+{
+    std::tuple<at::Tensor, at::Tensor> res;
+    CATCH(res = torch::linalg_ldl_factor(*A, hermitian););
+    *pivots = ResultTensor(std::get<1>(res));
+    return ResultTensor(std::get<0>(res));
+}
+
+Tensor THSLinalg_ldl_factor_ex(const Tensor A, const bool hermitian, const bool check_errors, Tensor* pivots, Tensor* info)
+{
+    std::tuple<at::Tensor, at::Tensor, at::Tensor> res;
+    CATCH(res = torch::linalg_ldl_factor_ex(*A, hermitian, check_errors););
+    *pivots = ResultTensor(std::get<1>(res));
+    *info = ResultTensor(std::get<2>(res));
+    return ResultTensor(std::get<0>(res));
+}
+
+Tensor THSLinalg_ldl_solve(const Tensor LD, const Tensor pivots, const Tensor B, const bool hermitian)
+{
+    CATCH_TENSOR(torch::linalg_ldl_solve(*LD, *pivots, *B, hermitian));
+}
+
 Tensor THSLinalg_matrix_norm(const Tensor tensor, const Scalar ord, const int64_t* dim, const int dim_length, const bool keepdim)
 {
     auto dims = c10::ArrayRef<int64_t>(dim, dim_length);

diff --git a/src/Native/LibTorchSharp/THSTensor.h b/src/Native/LibTorchSharp/THSTensor.h
@@ -1404,6 +1404,10 @@ EXPORT_API(Tensor) THSLinalg_lstsq_rcond(const Tensor A, const Tensor B, const d
 EXPORT_API(Tensor) THSLinalg_lu(const Tensor A, const bool pivot, Tensor* L, Tensor* U);
 EXPORT_API(Tensor) THSLinalg_lu_factor(const Tensor A, const bool pivot, Tensor* pivots);
 
+EXPORT_API(Tensor) THSLinalg_ldl_factor(const Tensor A, const bool hermitian, Tensor* pivots);
+EXPORT_API(Tensor) THSLinalg_ldl_factor_ex(const Tensor A, const bool hermitian, const bool check_errors, Tensor* pivots, Tensor* info);
+EXPORT_API(Tensor) THSLinalg_ldl_solve(const Tensor LD, const Tensor pivots, const Tensor B, const bool hermitian);
+
 EXPORT_API(Tensor) THSLinalg_matrix_power(const Tensor target, const int64_t n);
 
 EXPORT_API(Tensor) THSLinalg_matrix_norm(const Tensor tensor, const Scalar ord, const int64_t* dim, const int dim_length, const bool keepdim);

diff --git a/src/TorchSharp/Distributions/Transforms.cs b/src/TorchSharp/Distributions/Transforms.cs
@@ -529,11 +529,11 @@ public class SoftplusTransform : Transform
 
                     protected internal override Tensor _sign() => 1;
 
-                    protected internal override Tensor log_abs_det_jacobian(Tensor x, Tensor y) => torch.WrappedTensorDisposeScope(() => -nn.functional.softplus(-x));
+                    protected internal override Tensor log_abs_det_jacobian(Tensor x, Tensor y) => -nn.functional.softplus(-x);
 
-                    protected internal override Tensor _call(Tensor x) => nn.functional.softplus(x);
+                    protected internal override Tensor _call(Tensor x) => nn.functional.softplus(-x);
 
-                    protected internal override Tensor _inverse(Tensor y) => torch.WrappedTensorDisposeScope(() => (-y).expm1().neg().log() + y);
+                    protected internal override Tensor _inverse(Tensor y) => (-y).expm1().neg().log() + y;
                 }
 
                 public class SoftmaxTransform : Transform

diff --git a/src/TorchSharp/LinearAlgebra.cs b/src/TorchSharp/LinearAlgebra.cs
@@ -274,6 +274,51 @@ public static (Tensor LU, Tensor? Pivots) lu_factor(Tensor input, bool pivot = t
                 return (new Tensor(solution), pivots == IntPtr.Zero ? null : new Tensor(pivots));
             }
 
+            /// <summary>
+            /// Computes a compact representation of the LU factorization with partial pivoting of a matrix.
+            /// </summary>
+            /// <param name="input">Tensor of shape (*, m, n) where * is zero or more batch dimensions.</param>
+            /// <param name="hermitian">Controls whether to consider the input to be Hermitian or symmetric. For real-valued matrices, this switch has no effect.</param>
+            /// <returns></returns>
+            public static (Tensor LU, Tensor? Pivots) ldl_factor(Tensor input, bool hermitian = true)
+            {
+                var solution = THSLinalg_ldl_factor(input.Handle, hermitian, out var pivots);
+                if (solution == IntPtr.Zero)
+                    torch.CheckForErrors();
+                return (new Tensor(solution), pivots == IntPtr.Zero ? null : new Tensor(pivots));
+            }
+
+            /// <summary>
+            /// Computes a compact representation of the LU factorization with partial pivoting of a matrix.
+            /// </summary>
+            /// <param name="input">Tensor of shape (*, m, n) where * is zero or more batch dimensions.</param>
+            /// <param name="hermitian">Controls whether to consider the input to be Hermitian or symmetric. For real-valued matrices, this switch has no effect.</param>
+            /// <param name="check_errors">Controls whether to check the content of info and raise an error if it is non-zero.</param>
+            /// <returns></returns>
+            public static (Tensor LU, Tensor? Pivots, Tensor? Info) ldl_factor_ex(Tensor input, bool hermitian = true, bool check_errors = false)
+            {
+                var solution = THSLinalg_ldl_factor_ex(input.Handle, hermitian, check_errors, out var pivots, out var info);
+                if (solution == IntPtr.Zero)
+                    torch.CheckForErrors();
+                return (new Tensor(solution), pivots == IntPtr.Zero ? null : new Tensor(pivots), info == IntPtr.Zero ? null : new Tensor(info));
+            }
+
+            /// <summary>
+            /// Computes the solution of a system of linear equations using the LDL factorization.
+            /// </summary>
+            /// <param name="LD">the n times n matrix or the batch of such matrices of size (*, n, n) where * is one or more batch dimensions</param>
+            /// <param name="pivots">the pivots corresponding to the LDL factorization of LD</param>
+            /// <param name="B">Right-hand side tensor of shape (*, n, k)</param>
+            /// <param name="hermitian">Whether to consider the decomposed matrix to be Hermitian or symmetric. For real-valued matrices, this switch has no effect</param>
+            /// <returns></returns>
+            public static Tensor ldl_solve(Tensor LD, Tensor pivots, Tensor B, bool hermitian = false)
+            {
+                var res = THSLinalg_ldl_solve(LD.Handle, pivots.Handle, B.Handle, hermitian);
+                if (res == IntPtr.Zero)
+                    torch.CheckForErrors();
+                return new Tensor(res);
+            }
+
             /// <summary>
             /// Computes a solution to the least squares problem of a system of linear equations.
             /// </summary>

diff --git a/src/TorchSharp/Optimizers/Adadelta.cs b/src/TorchSharp/Optimizers/Adadelta.cs
@@ -25,10 +25,10 @@ public static partial class optim
             /// <param name="rho">Coefficient used for computing a running average of squared gradients (default: 0.9)</param>
             /// <param name="eps">Term added to the denominator to improve numerical stability, i.e. avoid division-by-zero (default: 1e-6)</param>
             /// <param name="weight_decay">Weight decay (L2 penalty) (default: 0)</param>
-            /// <returns></returns>
-            public static Adadelta Adadelta(IEnumerable<Parameter> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
+            /// <param name="maximize">Maximize the params based on the objective, instead of minimizing</param>
+            public static Adadelta Adadelta(IEnumerable<Parameter> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
             {
-                return new Adadelta(parameters, lr, rho, eps, weight_decay);
+                return new Adadelta(parameters, lr, rho, eps, weight_decay, maximize);
             }
 
             /// <summary>
@@ -42,10 +42,10 @@ public static Adadelta Adadelta(IEnumerable<Parameter> parameters, double lr = 1
             /// <param name="rho">Coefficient used for computing a running average of squared gradients (default: 0.9)</param>
             /// <param name="eps">Term added to the denominator to improve numerical stability, i.e. avoid division-by-zero (default: 1e-6)</param>
             /// <param name="weight_decay">Weight decay (L2 penalty) (default: 0)</param>
-            /// <returns></returns>
-            public static Adadelta Adadelta(IEnumerable<(string name, Parameter parameter)> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
+            /// <param name="maximize">Maximize the params based on the objective, instead of minimizing</param>
+            public static Adadelta Adadelta(IEnumerable<(string name, Parameter parameter)> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
             {
-                return new Adadelta(parameters.Select(np => np.parameter), lr, rho, eps, weight_decay);
+                return new Adadelta(parameters.Select(np => np.parameter), lr, rho, eps, weight_decay, maximize);
             }
 
             /// <summary>
@@ -59,10 +59,10 @@ public static Adadelta Adadelta(IEnumerable<(string name, Parameter parameter)>
             /// <param name="rho">Coefficient used for computing a running average of squared gradients (default: 0.9)</param>
             /// <param name="eps">Term added to the denominator to improve numerical stability, i.e. avoid division-by-zero (default: 1e-6)</param>
             /// <param name="weight_decay">Weight decay (L2 penalty) (default: 0)</param>
-            /// <returns></returns>
-            public static Adadelta Adadelta(IEnumerable<Adadelta.ParamGroup> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
+            /// <param name="maximize">Maximize the params based on the objective, instead of minimizing</param>
+            public static Adadelta Adadelta(IEnumerable<Adadelta.ParamGroup> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
             {
-                return new Adadelta(parameters, lr, rho, eps, weight_decay);
+                return new Adadelta(parameters, lr, rho, eps, weight_decay, maximize);
             }
         }
     }
@@ -79,8 +79,9 @@ public class Adadelta : OptimizerHelper
             /// <param name="rho">Coefficient used for computing a running average of squared gradients (default: 0.9)</param>
             /// <param name="eps">Term added to the denominator to improve numerical stability, i.e. avoid division-by-zero (default: 1e-6)</param>
             /// <param name="weight_decay">Weight decay (L2 penalty) (default: 0)</param>
-            public Adadelta(IEnumerable<Parameter> parameters, double lr, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
-                : this(new ParamGroup[] { new ParamGroup { Parameters = parameters } }, lr, rho, eps, weight_decay)
+            /// <param name="maximize">Maximize the params based on the objective, instead of minimizing</param>
+            public Adadelta(IEnumerable<Parameter> parameters, double lr, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
+                : this(new ParamGroup[] { new ParamGroup { Parameters = parameters } }, lr, rho, eps, weight_decay, maximize)
             {
             }
 
@@ -92,7 +93,8 @@ public Adadelta(IEnumerable<Parameter> parameters, double lr, double rho = 0.9,
             /// <param name="rho">Coefficient used for computing a running average of squared gradients (default: 0.9)</param>
             /// <param name="eps">Term added to the denominator to improve numerical stability, i.e. avoid division-by-zero (default: 1e-6)</param>
             /// <param name="weight_decay">Weight decay (L2 penalty) (default: 0)</param>
-            public Adadelta(IEnumerable<ParamGroup> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
+            /// <param name="maximize">Maximize the params based on the objective, instead of minimizing</param>
+            public Adadelta(IEnumerable<ParamGroup> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
             {
                 if (lr < 0.0) throw new ArgumentException($"Invalid learning rate: {lr}");
                 if (rho < 0.0 || rho > 1.0) throw new ArgumentException($"Invalid rho value: {rho}");
@@ -104,6 +106,7 @@ public Adadelta(IEnumerable<ParamGroup> parameters, double lr = 1.0, double rho
                     InitialLearningRate = lr,
                     rho = rho,
                     eps = eps,
+                    maximize = maximize,
                     weight_decay = weight_decay
                 };
 
@@ -116,23 +119,24 @@ public Adadelta(IEnumerable<ParamGroup> parameters, double lr = 1.0, double rho
             }
 
             /// <summary>
-                /// Performs a single optimization step (parameter update).
-                /// </summary>
-                /// <param name="closure">A closure that reevaluates the model and returns the loss. Optional for most optimizers.</param>
-                /// <returns></returns>
-                public override Tensor step(Func<Tensor> closure = null)
+            /// Performs a single optimization step (parameter update).
+            /// </summary>
+            /// <param name="closure">A closure that reevaluates the model and returns the loss. Optional for most optimizers.</param>
+            /// <returns></returns>
+            public override Tensor step(Func<Tensor> closure = null)
             {
                 return _step<ParamGroup>(group => {
 
                     var options = group.Options as Options;
                     var rho = options.rho.Value;
                     var eps = options.eps.Value;
                     var weight_decay = options.weight_decay.Value;
+                    var maximize = options.maximize.Value;
                     var lr = options.LearningRate.Value;
 
                     foreach (var param in group.Parameters) {
 
-                        var grad = param.grad();
+                        var grad = (maximize) ? -param.grad() : param.grad();
 
                         if (grad is null) continue;
 
@@ -253,6 +257,7 @@ public override void add_param_group(Modules.ParamGroup param_group)
                 if (!opt.rho.HasValue) opt.rho = def.rho;
                 if (!opt.eps.HasValue) opt.eps = def.eps;
                 if (!opt.weight_decay.HasValue) opt.weight_decay = def.weight_decay;
+                if (!opt.maximize.HasValue) opt.maximize = def.maximize;
 
                 opt.InitialLearningRate = opt.LearningRate.Value;
 
@@ -272,6 +277,7 @@ public class Options : OptimizerOptions
                 public double? rho;
                 public double? eps;
                 public double? weight_decay;
+                public bool? maximize;
 
                 /// <summary>
                 /// Load optimizer options (param-group hyperparameters) from another optimizer.
@@ -284,6 +290,7 @@ public override void LoadStateDict(OptimizerOptions source)
                     rho = opts.rho;
                     eps = opts.eps;
                     weight_decay = opts.weight_decay;
+                    maximize = opts.maximize;
                 }
 
                 /// <summary>
@@ -296,6 +303,7 @@ public override void LoadStateDict(BinaryReader reader)
                     rho = reader.ReadDouble();
                     eps = reader.ReadDouble();
                     weight_decay = reader.ReadDouble();
+                    maximize = reader.ReadBoolean();
                 }
 
                 /// <summary>
@@ -308,6 +316,7 @@ public override void SaveStateDict(BinaryWriter writer)
                     writer.Write(rho.Value);
                     writer.Write(eps.Value);
                     writer.Write(weight_decay.Value);
+                    writer.Write(maximize.Value);
                 }
             }
 
@@ -317,8 +326,8 @@ public ParamGroup() { }
 
                 public ParamGroup(IEnumerable<Parameter> parameters, Options options) : base(parameters, options) { }
 
-                public ParamGroup(IEnumerable<Parameter> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
-                    : base(parameters, new Adadelta.Options { LearningRate = lr, rho = rho, eps = eps, weight_decay = weight_decay })
+                public ParamGroup(IEnumerable<Parameter> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
+                    : base(parameters, new Adadelta.Options { LearningRate = lr, rho = rho, eps = eps, weight_decay = weight_decay, maximize = maximize })
                 {
                 }
             }