Autograd negation and tweaks (#86)

nogginly · web-flow · commit 9c91420509c9 · 2023-05-01T10:04:21.000-04:00
* Tweak to also accept Tensor class as `like:` param

* Fix grad context's `#variable` method for creating var from scalar

* Tweak to carry on `requires_grad`, add negation for grad var

* Test negation for grad var

* Fix compiler warning about `Backend::Storage#initialize` param name
diff --git a/spec/grad/gates_arithmetic_spec.cr b/spec/grad/gates_arithmetic_spec.cr
@@ -24,6 +24,34 @@
 require "../spec_helper"
 
 describe Num::Grad do
+  it "backpropogates for negation" do
+    ctx = Num::Grad::Context(Float32Tensor).new
+
+    a = ctx.variable([1.0_f32, 2.0_f32])
+
+    result = -a
+    result.backprop
+
+    expected = [-1_f32, -1_f32].to_tensor
+
+    Num::Testing.tensor_equal(a.grad, expected).should be_true
+  end
+
+  {% if flag?(:opencl) %}
+    it "backpropogates for negation opencl", tags: "opencl" do
+      ctx = Num::Grad::Context(Float32ClTensor).new
+
+      a = ctx.variable([1.0_f32, 2.0_f32].to_tensor(OCL))
+
+      result = -a
+      result.backprop
+
+      expected = [-1_f32, -1_f32].to_tensor
+
+      Num::Testing.tensor_equal(a.grad.cpu, expected).should be_true
+    end
+  {% end %}
+
   it "backpropogates for addition" do
     ctx = Num::Grad::Context(Float32Tensor).new
 
diff --git a/spec/grad/primitives_spec.cr b/spec/grad/primitives_spec.cr
@@ -40,6 +40,15 @@ describe Num::Grad::Context do
     t_var = ctx.variable(t)
     t_var.context.should eq ctx
   end
+
+  it "can create a variable with scalar" do
+    ctx = Num::Grad::Context(Float32Tensor).new
+    t = 3.14_f32
+    t_var = ctx.variable(t)
+    t_var.context.should eq ctx
+    t_var.value[0].should eq t   # has the scalar
+    t_var.value.size.should eq 1 # has only one element
+  end
 end
 
 describe Num::Grad do
diff --git a/src/grad/primitives/context.cr b/src/grad/primitives/context.cr
@@ -103,7 +103,7 @@ class Num::Grad::Context(T)
   # ctx.variable(3.0)
   # ```
   def variable(value : Number, requires_grad : Bool = true) : Num::Grad::Variable(T)
-    Num::Grad::Variable.new(self, T.new(value), requires_grad)
+    Num::Grad::Variable.new(self, Num.as_tensor(value, like: T), requires_grad)
   end
 
   # Creates a new variable within the `Context`.  This variable
diff --git a/src/grad/variable.cr b/src/grad/variable.cr
@@ -221,7 +221,8 @@ class Num::Grad::Variable(T)
   # x.sum(1) # => [[3.0], [7.0]]
   # ```
   def sum(axis : Int) : Num::Grad::Variable(T)
-    result = @context.variable(Num.sum(@value, axis, dims: true))
+    s = Num.sum(@value, axis, dims: true)
+    result = @context.variable(s, requires_grad: @requires_grad)
     if self.is_grad_needed
       gate = Num::Grad::SumGate(T).new self
       gate.cache(result, self)
@@ -246,10 +247,25 @@ class Num::Grad::Variable(T)
   # ```
   def mean(axis : Int) : Num::Grad::Variable(T)
     s = sum(axis)
-    b = @context.variable(Num.as_tensor(@value.shape[axis], like: s.value))
+    sz = Num.as_tensor(@value.shape[axis], like: s.value)
+    b = @context.variable(sz, requires_grad: @requires_grad)
     s / b
   end
 
+  # Negates the variable
+  #
+  # ## Examples
+  #
+  # ```
+  # ctx = Num::Grad::Context(Tensor(Float64, CPU(Float64))).new
+  # x = ctx.variable([1.0, 2.0])
+  # -x # => [-1.0, -2.0]
+  # ```
+  def -
+    zero = @context.variable(0, requires_grad: @requires_grad)
+    zero - self
+  end
+
   private macro num_op(fn, gate_cls)
     def {{fn.id}} : Num::Grad::Variable(T)
       result = @context.variable(Num.{{ fn.id }}(@value))
diff --git a/src/tensor/backends/agnostic/impl_manipulate.cr b/src/tensor/backends/agnostic/impl_manipulate.cr
@@ -331,7 +331,8 @@ module Num
   # ```
   # t = Tensor(Float32, OCL(Float32)).from_array([0.5, 0.2])
   # x = Num.as_tensor(12, like: t)
-  def as_tensor(value : Number, like : Tensor(U, V)) forall U, V
+  # x = Num.as_tensor(12, like: Tensor(Float32, OCL(Float32)))
+  def as_tensor(value : Number, like : Tensor(U, V) | Tensor(U, V).class) forall U, V
     Tensor(U, V).from_array([U.new(value)], device = V)
   end
 end
diff --git a/src/tensor/backends/cpu/impl_allocation.cr b/src/tensor/backends/cpu/impl_allocation.cr
@@ -109,8 +109,8 @@ class CPU(T) < Num::Backend::Storage(T)
   # a = Pointer(Int32).malloc(10)
   # s = CPU.new(a, [5, 2])
   # ```
-  def initialize(data : Pointer(T), shape : Array(Int), strides : Array(Int))
-    @data = data
+  def initialize(hostptr : Pointer(T), shape : Array(Int), strides : Array(Int))
+    @data = hostptr
   end
 
   # Converts a CPU storage to a crystal pointer
diff --git a/src/tensor/backends/util_storage.cr b/src/tensor/backends/util_storage.cr
@@ -26,7 +26,7 @@ abstract class Num::Backend::Storage(T)
   abstract def initialize(shape : Array(Int), strides : Array(Int))
   abstract def initialize(shape : Array(Int), order : Num::OrderType, value : T)
   abstract def initialize(shape : Array(Int), strides : Array(Int), value : T)
-  abstract def initialize(data : Pointer(T), shape : Array(Int), strides : Array(Int))
+  abstract def initialize(hostptr : Pointer(T), shape : Array(Int), strides : Array(Int))
   abstract def update_metadata(shape : Array(Int32), strides : Array(Int32))
   abstract def to_unsafe
 end