Merge pull request #2 from JulianKarlBauer/formal_aspects_paper

nickelnine37 · web-flow · commit 5202edd934c6 · 2022-11-11T13:11:10.000Z
Formal aspects paper
diff --git a/paper.bib b/paper.bib
@@ -1,7 +1,7 @@
 @article{Airola2018,
 author={Airola, Antti and Pahikkala, Tapio},
 journal={IEEE Transactions on Neural Networks and Learning Systems},
-title={Fast Kronecker Product Kernel Methods via Generalized Vec Trick},
+title={Fast {K}ronecker Product Kernel Methods via Generalized Vec Trick},
 year={2018},
 volume={29},
 number={8},
@@ -11,7 +11,7 @@ @article{Airola2018
 }
 
 @article{loan2000,
-title = {The ubiquitous Kronecker product},
+title = {The ubiquitous {K}ronecker product},
 journal = {Journal of Computational and Applied Mathematics},
 volume = {123},
 number = {1},
@@ -26,7 +26,7 @@ @article{loan2000
 
 @misc{Schacke2013,
     author = {Kathrin Schäcke},
-    title = {On the Kronecker product},
+    title = {On the {K}ronecker product},
     year = {2013},
 }
 
@@ -77,7 +77,7 @@ @article{Ravasi2020
 }
 
 @inproceedings{Stock2020,
-  title={A general-purpose toolbox for efficient Kronecker-based learning},
+  title={A general-purpose toolbox for efficient {K}ronecker-based learning},
   author={Michiel Stock and Tapio Pahikkala and Antti Airola and Bernard De Baets},
   doi={10.21105/jcon.00015},
   url={https://doi.org/10.21105/jcon.00015},
@@ -184,7 +184,7 @@ @article{Buis1996
 
 @article{Fackler2019,
 author = {Fackler, Paul L.},
-title = {Algorithm 993: Efficient Computation with Kronecker Products},
+title = {Algorithm 993: Efficient Computation with {K}ronecker Products},
 year = {2019},
 issue_date = {June 2019},
 publisher = {Association for Computing Machinery},
diff --git a/paper.md b/paper.md
@@ -37,20 +37,20 @@ $$
 \mathbf{A}_{1,1} \mathbf{B} & \dots  & \mathbf{A}_{1,n} \mathbf{B} \\
 \vdots   & \ddots & \vdots   \\
 \mathbf{A}_{n,1} \mathbf{B} & \dots  & \mathbf{A}_{n,n} \mathbf{B}
-\end{bmatrix}
+\end{bmatrix}.
 $$
 
 The resultant operator has shape $(nm \times nm)$ and, as such, can act on vectors of length $nm$. The Kronecker sum of $\mathbf{A}$ and $\mathbf{B}$, denoted $\mathbf{A} \oplus \mathbf{B}$ can be defined in terms of the Kronecker product as 
 $$
-\mathbf{A} \oplus \mathbf{B} = \mathbf{A} \otimes \mathbf{I}_m + \mathbf{I}_n \otimes \mathbf{B}
+\mathbf{A} \oplus \mathbf{B} = \mathbf{A} \otimes \mathbf{I}_m + \mathbf{I}_n \otimes \mathbf{B},
 $$
 where $\mathbf{I}_d$ is the $d$-dimensional identity matrix, resulting in an operator of the same size as $\mathbf{A} \otimes \mathbf{B}$. By applying these definitions recursively, the Kronecker product or sum of more than two matrices can also be defined. In general, the Kronecker product/sum of $k$ square matrices $\{ \mathbf{A}^{(i)} \}_{i=1}^k$, with shapes $\{n_i \times n_i\}_{i=1}^k$ can be written respectively as
 $$
 \bigotimes_{i=1}^k \mathbf{A}^{(i)} = \mathbf{A}^{(1)} \otimes \mathbf{A}^{(2)} \otimes \dots \otimes \mathbf{A}^{(k)}
 $$
 and 
 $$
-\bigoplus_{i=1}^k \mathbf{A}^{(i)} = \mathbf{A}^{(1)} \oplus \mathbf{A}^{(2)} \oplus \dots \oplus \mathbf{A}^{(k)}
+\bigoplus_{i=1}^k \mathbf{A}^{(i)} = \mathbf{A}^{(1)} \oplus \mathbf{A}^{(2)} \oplus \dots \oplus \mathbf{A}^{(k)}.
 $$
 The resultant operators can act on either vectors of length $N = \prod_{i=1}^k n_i$, or equivalently tensors of shape $(n_1, n_2, \dots n_k)$. 
 
@@ -68,7 +68,7 @@ In PyKronecker, expressions are written in terms of a high-level operator abstra
 
 b) *To execute matrix-vector multiplications in a way that is maximally efficient and runs on parallel GPU/TPU hardware.*
 
-Significant effort has gone into optimising the execution of matrix-vector and matrix-tensor multiplications. In particular, this comprises the kronx algorithm, Just In Time (JIT) compilation, and parallel processing on GPU/TPU hardware. As a result of this, PyKronecker is able to achieve very fast execution times compared to alternative implementations (see table 1) .  
+Significant effort has gone into optimising the execution of matrix-vector and matrix-tensor multiplications. In particular, this comprises the kronx algorithm, Just In Time (JIT) compilation, and parallel processing on GPU/TPU hardware. As a result of this, PyKronecker is able to achieve very fast execution times compared to alternative implementations (see Table 1) .  
 
 c) *To allow automatic differentiation for complex loss functions involving Kronecker products.*
 
@@ -82,7 +82,7 @@ One potential alternative in Python is the PyLops library which provides an inte
 
 Another alternative is the library Kronecker.jl [@Stock2020], implemented in the Julia programming language [@bezanson2017]. Kronecker.jl has many of the same aims as PyKronecker and has a a clean interface, making use of Julia's support for unicode and infix functions to create Kronecker products with a custom $\otimes$ operator. However, at this time, the library does not support GPU acceleration or automatic differentiation, although the former is in development. 
 
-Table 1. shows a feature comparison of these libraries, along with the kronx algorithm implemented in "vanilla" (i.e running on the CPU without JIT compilation) NumPy. The table also shows the time to compute the multiplication of a Kronecker product against a vector in two scenarios. In the first, the Kronecker product is constructed from two of matrices of size $(400 \times 400)$ and $(500 \times 500)$, and in the second Kronecker product is constructed from three of matrices of size $(100 \times 100)$,  $(150 \times 150)$ and  $(200 \times 200)$ respectively. Experiments were performed with an Intel Core  2.80GHz i7-7700HQ CPU, and an Nvidia 1050Ti GPU.  In both cases, PyKronecker on the GPU is the fastest by a significant margin. 
+Table 1. shows a feature comparison of these libraries, along with the kronx algorithm implemented in "vanilla" (i.e., running on the CPU without JIT compilation) NumPy. The table also shows the time to compute the multiplication of a Kronecker product against a vector in two scenarios. In the first scenario, the Kronecker product is constructed from two matrices of size $(400 \times 400)$ and $(500 \times 500)$, and in the second scenario Kronecker product is constructed from three matrices of size $(100 \times 100)$,  $(150 \times 150)$ and  $(200 \times 200)$, respectively. Experiments were performed with an Intel Core  2.80GHz i7-7700HQ CPU, and an Nvidia 1050Ti GPU.  In both cases, PyKronecker on the GPU is the fastest by a significant margin. 
 
 | Implementation    | Python | Auto-diff | GPU support | Compute time (400, 500) | Compute time (100, 150, 200) |
 | ----------------- | ------ | --------- | ----------- | ----------------------- | ---------------------------- |
@@ -95,7 +95,7 @@ Table 1. shows a feature comparison of these libraries, along with the kronx alg
 
 # Outlook and Future Work
 
-There are several features that we are developing to expand the functionality of PyKronecker. The first is to provide support for non-square operators. In a typical problem, the Kronecker operators encountered represent simple linear transformations which preserve dimensionality, however, there are a significant minority of contexts where this is not the case. The inclusion of this feature would increase the range of possible applications. Secondly, we would like add support for sparse matrices. This would enable computation with larger matrices and faster execution times where applicable.  However this would require integration with Jax's sparse module, which is currently under development. Finally, for convenience, it may be useful to add some commonly used algorithms such as the conjugate gradient method for solving linear systems [@shewchuk1994], least squares, and various matrix decompositions such as eigenvalue, Cholesky and LU.   
+There are several features that we are developing to expand the functionality of PyKronecker. The first is to provide support for non-square operators. In a typical problem, the Kronecker operators encountered represent simple linear transformations which preserve dimensionality. However, there are a significant minority of contexts where this is not the case. The inclusion of this feature would increase the range of possible applications. Secondly, we would like to add support for sparse matrices. This would enable computation with larger matrices and faster execution times where applicable.  However this would require integration with Jax's sparse module, which is currently under development. Finally, for convenience, it may be useful to add some commonly used algorithms such as the conjugate gradient method for solving linear systems [@shewchuk1994], least squares, and various matrix decompositions such as eigenvalue, Cholesky and LU.   
 
 # Acknowledgements