Skip to content

Commit

Permalink
Merge pull request dotnet#859 from NiklasGustafsson/main
Browse files Browse the repository at this point in the history
Adding a few 1.12 APIs.
  • Loading branch information
NiklasGustafsson committed Dec 1, 2022
2 parents 03ceea3 + 749b706 commit 2230dec
Show file tree
Hide file tree
Showing 11 changed files with 176 additions and 37 deletions.
8 changes: 8 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

Releases, starting with 9/2/2021, are listed with the most recent release at the top.

## NuGet Version 0.99.2

__API Changes__:

Adding 'maximize' argument to the Adadelta optimizer
Adding linalg.ldl_factor and linalg.ldl_solve
Adding SoftplusTransform

## NuGet Version 0.99.1

__Breaking Changes__:
Expand Down
20 changes: 10 additions & 10 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
buildScript: dotnet build /p:SkipCuda=true /p:SkipNetFxBuild=true -c
testScript: dotnet test /p:SkipCuda=true /p:SkipNetFxBuild=true --blame -c
pool:
vmImage: 'windows-2022'
vmImage: 'windows-2019'

- template: /build/ci/job-template.yml
parameters:
Expand All @@ -60,7 +60,7 @@ jobs:
buildScript: dotnet build /p:SkipCuda=true /p:SkipNetCoreBuild=true -c
testScript: dotnet test /p:SkipCuda=true /p:SkipNetCoreBuild=true --blame -c
pool:
vmImage: 'windows-2022'
vmImage: 'windows-2019'

- template: /build/ci/job-template.yml
parameters:
Expand Down Expand Up @@ -133,7 +133,7 @@ jobs:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
DOTNET_MULTILEVEL_LOOKUP: 0
pool:
vmImage: 'windows-2022'
vmImage: 'windows-2019'

steps:

Expand Down Expand Up @@ -209,7 +209,7 @@ jobs:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
DOTNET_MULTILEVEL_LOOKUP: 0
pool:
vmImage: 'windows-2022'
vmImage: 'windows-2019'
steps:

# We are 10GB space-constrained on the Azure Pipelines CI system so clean up what we can
Expand Down Expand Up @@ -346,7 +346,7 @@ jobs:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
DOTNET_MULTILEVEL_LOOKUP: 0
pool:
vmImage: 'windows-2022'
vmImage: 'windows-2019'
steps:

# We are 10GB space-constrained on the Azure Pipelines CI system so clean up what we can
Expand Down Expand Up @@ -458,7 +458,7 @@ jobs:
variables:
- group: SignClient Credentials
pool:
vmImage: 'windows-2022'
vmImage: 'windows-2019'
steps:

- task: DotNetCoreCLI@2
Expand Down Expand Up @@ -507,7 +507,7 @@ jobs:
variables:
- group: SignClient Credentials
pool:
vmImage: 'windows-2022'
vmImage: 'windows-2019'
steps:

- task: DotNetCoreCLI@2
Expand Down Expand Up @@ -586,7 +586,7 @@ jobs:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
DOTNET_MULTILEVEL_LOOKUP: 0
pool:
vmImage: 'windows-2022'
vmImage: 'windows-2019'
steps:

# Push packages to feed
Expand Down Expand Up @@ -687,7 +687,7 @@ jobs:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
DOTNET_MULTILEVEL_LOOKUP: 0
pool:
vmImage: 'windows-2022'
vmImage: 'windows-2019'
steps:

# Push packages to feed
Expand Down Expand Up @@ -772,7 +772,7 @@ jobs:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
DOTNET_MULTILEVEL_LOOKUP: 0
pool:
vmImage: 'windows-2022'
vmImage: 'windows-2019'
# container: UbuntuContainer
# Exact copy of the dependency install above - TODO share this somewhere
steps:
Expand Down
22 changes: 22 additions & 0 deletions src/Native/LibTorchSharp/THSLinearAlgebra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,28 @@ Tensor THSLinalg_lu_factor(const Tensor A, const bool pivot, Tensor* pivots)
return ResultTensor(std::get<0>(res));
}

Tensor THSLinalg_ldl_factor(const Tensor A, const bool hermitian, Tensor* pivots)
{
std::tuple<at::Tensor, at::Tensor> res;
CATCH(res = torch::linalg_ldl_factor(*A, hermitian););
*pivots = ResultTensor(std::get<1>(res));
return ResultTensor(std::get<0>(res));
}

Tensor THSLinalg_ldl_factor_ex(const Tensor A, const bool hermitian, const bool check_errors, Tensor* pivots, Tensor* info)
{
std::tuple<at::Tensor, at::Tensor, at::Tensor> res;
CATCH(res = torch::linalg_ldl_factor_ex(*A, hermitian, check_errors););
*pivots = ResultTensor(std::get<1>(res));
*info = ResultTensor(std::get<2>(res));
return ResultTensor(std::get<0>(res));
}

Tensor THSLinalg_ldl_solve(const Tensor LD, const Tensor pivots, const Tensor B, const bool hermitian)
{
CATCH_TENSOR(torch::linalg_ldl_solve(*LD, *pivots, *B, hermitian));
}

Tensor THSLinalg_matrix_norm(const Tensor tensor, const Scalar ord, const int64_t* dim, const int dim_length, const bool keepdim)
{
auto dims = c10::ArrayRef<int64_t>(dim, dim_length);
Expand Down
4 changes: 4 additions & 0 deletions src/Native/LibTorchSharp/THSTensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1404,6 +1404,10 @@ EXPORT_API(Tensor) THSLinalg_lstsq_rcond(const Tensor A, const Tensor B, const d
EXPORT_API(Tensor) THSLinalg_lu(const Tensor A, const bool pivot, Tensor* L, Tensor* U);
EXPORT_API(Tensor) THSLinalg_lu_factor(const Tensor A, const bool pivot, Tensor* pivots);

EXPORT_API(Tensor) THSLinalg_ldl_factor(const Tensor A, const bool hermitian, Tensor* pivots);
EXPORT_API(Tensor) THSLinalg_ldl_factor_ex(const Tensor A, const bool hermitian, const bool check_errors, Tensor* pivots, Tensor* info);
EXPORT_API(Tensor) THSLinalg_ldl_solve(const Tensor LD, const Tensor pivots, const Tensor B, const bool hermitian);

EXPORT_API(Tensor) THSLinalg_matrix_power(const Tensor target, const int64_t n);

EXPORT_API(Tensor) THSLinalg_matrix_norm(const Tensor tensor, const Scalar ord, const int64_t* dim, const int dim_length, const bool keepdim);
Expand Down
6 changes: 3 additions & 3 deletions src/TorchSharp/Distributions/Transforms.cs
Original file line number Diff line number Diff line change
Expand Up @@ -529,11 +529,11 @@ public class SoftplusTransform : Transform

protected internal override Tensor _sign() => 1;

protected internal override Tensor log_abs_det_jacobian(Tensor x, Tensor y) => torch.WrappedTensorDisposeScope(() => -nn.functional.softplus(-x));
protected internal override Tensor log_abs_det_jacobian(Tensor x, Tensor y) => -nn.functional.softplus(-x);

protected internal override Tensor _call(Tensor x) => nn.functional.softplus(x);
protected internal override Tensor _call(Tensor x) => nn.functional.softplus(-x);

protected internal override Tensor _inverse(Tensor y) => torch.WrappedTensorDisposeScope(() => (-y).expm1().neg().log() + y);
protected internal override Tensor _inverse(Tensor y) => (-y).expm1().neg().log() + y;
}

public class SoftmaxTransform : Transform
Expand Down
45 changes: 45 additions & 0 deletions src/TorchSharp/LinearAlgebra.cs
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,51 @@ public static (Tensor LU, Tensor? Pivots) lu_factor(Tensor input, bool pivot = t
return (new Tensor(solution), pivots == IntPtr.Zero ? null : new Tensor(pivots));
}

/// <summary>
/// Computes a compact representation of the LU factorization with partial pivoting of a matrix.
/// </summary>
/// <param name="input">Tensor of shape (*, m, n) where * is zero or more batch dimensions.</param>
/// <param name="hermitian">Controls whether to consider the input to be Hermitian or symmetric. For real-valued matrices, this switch has no effect.</param>
/// <returns></returns>
public static (Tensor LU, Tensor? Pivots) ldl_factor(Tensor input, bool hermitian = true)
{
var solution = THSLinalg_ldl_factor(input.Handle, hermitian, out var pivots);
if (solution == IntPtr.Zero)
torch.CheckForErrors();
return (new Tensor(solution), pivots == IntPtr.Zero ? null : new Tensor(pivots));
}

/// <summary>
/// Computes a compact representation of the LU factorization with partial pivoting of a matrix.
/// </summary>
/// <param name="input">Tensor of shape (*, m, n) where * is zero or more batch dimensions.</param>
/// <param name="hermitian">Controls whether to consider the input to be Hermitian or symmetric. For real-valued matrices, this switch has no effect.</param>
/// <param name="check_errors">Controls whether to check the content of info and raise an error if it is non-zero.</param>
/// <returns></returns>
public static (Tensor LU, Tensor? Pivots, Tensor? Info) ldl_factor_ex(Tensor input, bool hermitian = true, bool check_errors = false)
{
var solution = THSLinalg_ldl_factor_ex(input.Handle, hermitian, check_errors, out var pivots, out var info);
if (solution == IntPtr.Zero)
torch.CheckForErrors();
return (new Tensor(solution), pivots == IntPtr.Zero ? null : new Tensor(pivots), info == IntPtr.Zero ? null : new Tensor(info));
}

/// <summary>
/// Computes the solution of a system of linear equations using the LDL factorization.
/// </summary>
/// <param name="LD">the n times n matrix or the batch of such matrices of size (*, n, n) where * is one or more batch dimensions</param>
/// <param name="pivots">the pivots corresponding to the LDL factorization of LD</param>
/// <param name="B">Right-hand side tensor of shape (*, n, k)</param>
/// <param name="hermitian">Whether to consider the decomposed matrix to be Hermitian or symmetric. For real-valued matrices, this switch has no effect</param>
/// <returns></returns>
public static Tensor ldl_solve(Tensor LD, Tensor pivots, Tensor B, bool hermitian = false)
{
var res = THSLinalg_ldl_solve(LD.Handle, pivots.Handle, B.Handle, hermitian);
if (res == IntPtr.Zero)
torch.CheckForErrors();
return new Tensor(res);
}

/// <summary>
/// Computes a solution to the least squares problem of a system of linear equations.
/// </summary>
Expand Down
49 changes: 29 additions & 20 deletions src/TorchSharp/Optimizers/Adadelta.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ public static partial class optim
/// <param name="rho">Coefficient used for computing a running average of squared gradients (default: 0.9)</param>
/// <param name="eps">Term added to the denominator to improve numerical stability, i.e. avoid division-by-zero (default: 1e-6)</param>
/// <param name="weight_decay">Weight decay (L2 penalty) (default: 0)</param>
/// <returns></returns>
public static Adadelta Adadelta(IEnumerable<Parameter> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
/// <param name="maximize">Maximize the params based on the objective, instead of minimizing</param>
public static Adadelta Adadelta(IEnumerable<Parameter> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
{
return new Adadelta(parameters, lr, rho, eps, weight_decay);
return new Adadelta(parameters, lr, rho, eps, weight_decay, maximize);
}

/// <summary>
Expand All @@ -42,10 +42,10 @@ public static Adadelta Adadelta(IEnumerable<Parameter> parameters, double lr = 1
/// <param name="rho">Coefficient used for computing a running average of squared gradients (default: 0.9)</param>
/// <param name="eps">Term added to the denominator to improve numerical stability, i.e. avoid division-by-zero (default: 1e-6)</param>
/// <param name="weight_decay">Weight decay (L2 penalty) (default: 0)</param>
/// <returns></returns>
public static Adadelta Adadelta(IEnumerable<(string name, Parameter parameter)> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
/// <param name="maximize">Maximize the params based on the objective, instead of minimizing</param>
public static Adadelta Adadelta(IEnumerable<(string name, Parameter parameter)> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
{
return new Adadelta(parameters.Select(np => np.parameter), lr, rho, eps, weight_decay);
return new Adadelta(parameters.Select(np => np.parameter), lr, rho, eps, weight_decay, maximize);
}

/// <summary>
Expand All @@ -59,10 +59,10 @@ public static Adadelta Adadelta(IEnumerable<(string name, Parameter parameter)>
/// <param name="rho">Coefficient used for computing a running average of squared gradients (default: 0.9)</param>
/// <param name="eps">Term added to the denominator to improve numerical stability, i.e. avoid division-by-zero (default: 1e-6)</param>
/// <param name="weight_decay">Weight decay (L2 penalty) (default: 0)</param>
/// <returns></returns>
public static Adadelta Adadelta(IEnumerable<Adadelta.ParamGroup> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
/// <param name="maximize">Maximize the params based on the objective, instead of minimizing</param>
public static Adadelta Adadelta(IEnumerable<Adadelta.ParamGroup> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
{
return new Adadelta(parameters, lr, rho, eps, weight_decay);
return new Adadelta(parameters, lr, rho, eps, weight_decay, maximize);
}
}
}
Expand All @@ -79,8 +79,9 @@ public class Adadelta : OptimizerHelper
/// <param name="rho">Coefficient used for computing a running average of squared gradients (default: 0.9)</param>
/// <param name="eps">Term added to the denominator to improve numerical stability, i.e. avoid division-by-zero (default: 1e-6)</param>
/// <param name="weight_decay">Weight decay (L2 penalty) (default: 0)</param>
public Adadelta(IEnumerable<Parameter> parameters, double lr, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
: this(new ParamGroup[] { new ParamGroup { Parameters = parameters } }, lr, rho, eps, weight_decay)
/// <param name="maximize">Maximize the params based on the objective, instead of minimizing</param>
public Adadelta(IEnumerable<Parameter> parameters, double lr, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
: this(new ParamGroup[] { new ParamGroup { Parameters = parameters } }, lr, rho, eps, weight_decay, maximize)
{
}

Expand All @@ -92,7 +93,8 @@ public Adadelta(IEnumerable<Parameter> parameters, double lr, double rho = 0.9,
/// <param name="rho">Coefficient used for computing a running average of squared gradients (default: 0.9)</param>
/// <param name="eps">Term added to the denominator to improve numerical stability, i.e. avoid division-by-zero (default: 1e-6)</param>
/// <param name="weight_decay">Weight decay (L2 penalty) (default: 0)</param>
public Adadelta(IEnumerable<ParamGroup> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
/// <param name="maximize">Maximize the params based on the objective, instead of minimizing</param>
public Adadelta(IEnumerable<ParamGroup> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
{
if (lr < 0.0) throw new ArgumentException($"Invalid learning rate: {lr}");
if (rho < 0.0 || rho > 1.0) throw new ArgumentException($"Invalid rho value: {rho}");
Expand All @@ -104,6 +106,7 @@ public Adadelta(IEnumerable<ParamGroup> parameters, double lr = 1.0, double rho
InitialLearningRate = lr,
rho = rho,
eps = eps,
maximize = maximize,
weight_decay = weight_decay
};

Expand All @@ -116,23 +119,24 @@ public Adadelta(IEnumerable<ParamGroup> parameters, double lr = 1.0, double rho
}

/// <summary>
/// Performs a single optimization step (parameter update).
/// </summary>
/// <param name="closure">A closure that reevaluates the model and returns the loss. Optional for most optimizers.</param>
/// <returns></returns>
public override Tensor step(Func<Tensor> closure = null)
/// Performs a single optimization step (parameter update).
/// </summary>
/// <param name="closure">A closure that reevaluates the model and returns the loss. Optional for most optimizers.</param>
/// <returns></returns>
public override Tensor step(Func<Tensor> closure = null)
{
return _step<ParamGroup>(group => {
var options = group.Options as Options;
var rho = options.rho.Value;
var eps = options.eps.Value;
var weight_decay = options.weight_decay.Value;
var maximize = options.maximize.Value;
var lr = options.LearningRate.Value;
foreach (var param in group.Parameters) {
var grad = param.grad();
var grad = (maximize) ? -param.grad() : param.grad();
if (grad is null) continue;
Expand Down Expand Up @@ -253,6 +257,7 @@ public override void add_param_group(Modules.ParamGroup param_group)
if (!opt.rho.HasValue) opt.rho = def.rho;
if (!opt.eps.HasValue) opt.eps = def.eps;
if (!opt.weight_decay.HasValue) opt.weight_decay = def.weight_decay;
if (!opt.maximize.HasValue) opt.maximize = def.maximize;

opt.InitialLearningRate = opt.LearningRate.Value;

Expand All @@ -272,6 +277,7 @@ public class Options : OptimizerOptions
public double? rho;
public double? eps;
public double? weight_decay;
public bool? maximize;

/// <summary>
/// Load optimizer options (param-group hyperparameters) from another optimizer.
Expand All @@ -284,6 +290,7 @@ public override void LoadStateDict(OptimizerOptions source)
rho = opts.rho;
eps = opts.eps;
weight_decay = opts.weight_decay;
maximize = opts.maximize;
}

/// <summary>
Expand All @@ -296,6 +303,7 @@ public override void LoadStateDict(BinaryReader reader)
rho = reader.ReadDouble();
eps = reader.ReadDouble();
weight_decay = reader.ReadDouble();
maximize = reader.ReadBoolean();
}

/// <summary>
Expand All @@ -308,6 +316,7 @@ public override void SaveStateDict(BinaryWriter writer)
writer.Write(rho.Value);
writer.Write(eps.Value);
writer.Write(weight_decay.Value);
writer.Write(maximize.Value);
}
}

Expand All @@ -317,8 +326,8 @@ public ParamGroup() { }

public ParamGroup(IEnumerable<Parameter> parameters, Options options) : base(parameters, options) { }

public ParamGroup(IEnumerable<Parameter> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0)
: base(parameters, new Adadelta.Options { LearningRate = lr, rho = rho, eps = eps, weight_decay = weight_decay })
public ParamGroup(IEnumerable<Parameter> parameters, double lr = 1.0, double rho = 0.9, double eps = 1e-6, double weight_decay = 0, bool maximize = false)
: base(parameters, new Adadelta.Options { LearningRate = lr, rho = rho, eps = eps, weight_decay = weight_decay, maximize = maximize })
{
}
}
Expand Down
Loading

0 comments on commit 2230dec

Please sign in to comment.