Skip to content

Commit 4f8b3df

Browse files
committed
Post-lecture improvements
1 parent b531283 commit 4f8b3df

File tree

2 files changed

+220
-137
lines changed

2 files changed

+220
-137
lines changed

src/05_Interpolation.jl

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -836,20 +836,17 @@ Let us **illustrate this result grapically**. We consider again our function $f_
836836
distinct nodes $\{x_i\}_{i=1}^{\texttt{n\_nodes\_poly}}$
837837
--- either equally spaced (left plot) or using Chebyshev nodes (right plot). Additional for both cases we consider an exact evaluation, i.e. points $(x_i, y_i) = (x_i, f(x_i))$ as well as noisy evaluations $(x_i, \tilde{y}_i)$ with
838838
```math
839-
\tilde{y}_i = f(x_i) + η_i
839+
\tilde{y}_i = f(x_i) + ε_i
840840
```
841-
where $η_i$ is a random number of magnitude $|\eta_i| ≤ 10^\texttt{η\_log\_poly}$.
841+
where $ε_i$ is a random number of magnitude $|ε_i| ≤ \texttt{ε\_poly}$.
842842
"""
843843

844844
# ╔═╡ d5de3b11-7781-4100-8a63-d26426685bbc
845845
md"""
846846
- Number of nodes `n_nodes_poly = ` $(@bind n_nodes_poly Slider(2:20; show_value=true, default=10))
847-
- Logarithm of noise amplitude `η_log_poly = ` $(@bind η_log_poly Slider(-3:0.25:-0.5; show_value=true, default=-2))
847+
- Noise amplitude `ε_poly = ` $(@bind ε_poly Slider(10 .^ (-3:0.25:-0.5); show_value=true, default=0.01))
848848
"""
849849

850-
# ╔═╡ e4ce9bd3-2b8c-458f-aade-b74fd973d19e
851-
md"Comparing to our theoretical analysis this gives a **noise ε = $(round(10^(η_log_poly), sigdigits=3))**."
852-
853850
# ╔═╡ 4a4540a3-b4cf-47fd-a615-a5280505333f
854851
let
855852
fine = range(-1.1, 1.1; length=500)
@@ -860,7 +857,7 @@ let
860857

861858
p = plot(; layout=grid(2, 2; heights=[0.75, 0.25]))
862859
for (i, values) in enumerate((values_equispaced, values_chebyshev))
863-
noise = [10^η_log_poly * (-1 + 2rand()) for _ in values]
860+
noise = [ε_poly * (-1 + 2rand()) for _ in values]
864861

865862
# fsin_accurate = fsin.(values) # = [fsin(x) for x in values]
866863
fsin_noise = fsin.(values) + noise
@@ -877,7 +874,7 @@ let
877874

878875
sub_noise = i + 2
879876
plot!(p, fine, abs.(fsin.(fine) .- poly_noise.(fine)), label="noisy fit", yaxis=:log, xlim=(-1, 1), ylims=(10e-5, 1), legend=:topright, c=3, lw=1.5, ylabel="error", subplot=sub_noise)
880-
hline!(p, [10^η_log_poly], label=L"noise $\eta$", c=3, lw=1, ls=:dash, subplot=sub_noise)
877+
hline!(p, [ε_poly], label=L"noise $\varepsilon$", c=3, lw=1, ls=:dash, subplot=sub_noise)
881878
# plot!(p, fine, abs.(fsin.(fine) .- poly_accurate.(fine)), label="fit", c=2, lw=1.5, subplot=sub_noise)
882879
end
883880
title!(p, "equispaced"; subplot=1)
@@ -1068,7 +1065,7 @@ end
10681065

10691066
# ╔═╡ f6055e59-1ddd-4148-8a85-95f4501e3f9f
10701067
let
1071-
p = plot(; xlims=(-1, 1), title="Error for piecewise interpolation")
1068+
p = plot(; xlims=(-1, 1), title="Error for piecewise interpolation", ylabel=L"p_{1h} - f_{ratio}")
10721069
for n_nodes in (5, 10, 30, 40)
10731070
nodes = range(-1, 1; length=n_nodes)
10741071
p₁ₕ = pwlinear(nodes, fratio.(nodes))
@@ -1300,15 +1297,15 @@ we take $\texttt{n\_nodes\_pl}$ equally spaced nodes.
13001297
# ╔═╡ 2096258a-efd9-4de1-8f56-c02295407c0b
13011298
md"""
13021299
- Number of nodes `n_nodes_pl = ` $(@bind n_nodes_pl Slider(2:2:40; show_value=true, default=20))
1303-
- Logarithm of noise amplitude `η_log_pl = ` $(@bind η_log_pl Slider(-3:0.25:0; show_value=true, default=-2))
1300+
- Noise amplitude `ε_pl = ` $(@bind ε_pl Slider(10 .^ (-3:0.25:0); show_value=true, default=0.01))
13041301
"""
13051302

13061303
# ╔═╡ e2b32c49-9c8e-4d32-8bf6-a0517b7caa8a
13071304
let
13081305
fine = range(-1.0, 1.0; length=500)
13091306

13101307
nodes = range(-1, 1; length=n_nodes_pl)
1311-
noise = [10^η_log_pl * (-1 + 2rand()) for _ in nodes]
1308+
noise = [ε_pl* (-1 + 2rand()) for _ in nodes]
13121309

13131310
p₁ₕ_accurate = pwlinear(nodes, fsin.(nodes))
13141311
p₁ₕ_noisy = pwlinear(nodes, fsin.(nodes) + noise)
@@ -1321,7 +1318,7 @@ let
13211318
scatter!(p, nodes, fsin.(nodes) + noise; label="")
13221319

13231320
q = plot(fine, abs.(fsin.(fine) .- p₁ₕ_noisy.(fine)), label=L"noisy fit $\tilde{p}_{1,h}$", yaxis=:log, xlim=(-1, 1), ylims=(10e-5, 1), legend=:topright, c=3, lw=1.5, ylabel="error")
1324-
hline!(q, [10^η_log_pl], label=L"noise $\eta$", c=3, lw=1, ls=:dash)
1321+
hline!(q, [ε_pl], label=L"noise $\varepsilon$", c=3, lw=1, ls=:dash)
13251322
plot!(q, fine, abs.(fsin.(fine) .- p₁ₕ_accurate.(fine)), label=L"fit $p_{1,h}$", c=2, lw=1.5)
13261323

13271324
plot(p, q; layout=grid(2, 1; heights=[0.75, 0.25]))
@@ -1690,7 +1687,7 @@ Other choices are for example to employ
16901687
However, we will not consider these further.
16911688
"""
16921689

1693-
# ╔═╡ 2122f3c9-a901-4488-bd1b-4bd7e49b7c8a
1690+
# ╔═╡ 2521d29f-64ee-4221-8d92-d83e71068758
16941691
md"""
16951692
### Least squares problems
16961693
We will now consider how to solve polynomial regression problems (16).
@@ -1699,36 +1696,34 @@ For this we introduce some linear algebra.
16991696
Recall from equation (1) at the start of discussion of polynomial interpolation,
17001697
that each polynomial $q \in \mathbb{P}_m$ can be written as a linear combination
17011698
```math
1702-
q(x) = \sum_{j=0}^m c_j φ_j(x)
1699+
q(x) = \sum_{j=0}^m c_j x^j
17031700
```
1704-
where $φ_j$ are suitable basis functions, such as the monomials $φ_j(x) = x^j$.
1701+
in terms of the monomial basis $1 = x^0, x^1, \ldots, x^m$.
17051702
Inserting this, the least-squares error expression can be rewritten as
17061703
```math
1707-
e_\text{LS} = \sum_{i=1}^{n} \left| y_i - \sum_{j=0}^m c_j φ_j(x_i) \right|^2
1704+
e_\text{LS} = \sum_{i=1}^{n} \left| y_i - \sum_{j=0}^m c_j x_i^j \right|^2
17081705
```
17091706
and further by introducing the matrix / vector notation
17101707
```math
17111708
\textbf{V} = \left(\begin{array}{cccc}
1712-
φ_0(x_1) & φ_1(x_1) & \cdots & φ_m(x_1) \\
1713-
φ_0(x_2) & φ_1(x_2) & \cdots & φ_m(x_2) \\
1709+
x_1^0 & x_1^1 & \cdots & x_1^m \\
1710+
x_2^0 & x_2^1 & \cdots & x_2^m \\
17141711
& \vdots\\
1715-
φ_0(x_{n}) & φ_1(x_{n}) & \cdots & φ_m(x_{n})
1712+
x_{n}^0 & x_{n}^1 & \cdots & x_{n}^m
17161713
\end{array}\right) \qquad
17171714
\textbf{y} = \left(\begin{array}{c} y_1 \\ y_2 \\ \vdots \\ y_n \end{array}\right)
17181715
\qquad
1719-
\textbf{c} = \left(\begin{array}{c} c_1 \\ c_2 \\ \vdots \\ c_n \end{array}\right)
1716+
\textbf{c} = \left(\begin{array}{c} c_0 \\ c_2 \\ \vdots \\ c_{m} \end{array}\right)
17201717
```
17211718
as
17221719
```math
17231720
\tag{16}
17241721
e_\text{LS} = \| \mathbf{y} - \mathbf{V} \mathbf{c} \|_2^2
17251722
```
17261723
where $\| v \|_2 = \sqrt{ \sum_{i=1}^n v_i^2 }$ is the Euklidean norm.
1727-
If we are using the monomial basis
1728-
the matrix $\mathbf{V}$ is again equal to a Vandermode matrix,
1729-
same as in polynomial interpolation.
1730-
However, since for regression problems we usually have that $n > m + 1$
1731-
it is rectangular in this case:
1724+
We again recognise $\mathbf V$ to be a Vandermonde matrix
1725+
similar to the polynomial interpolation case, just in this case a rectangular
1726+
matrix as $n > m + 1$, that is
17321727
```math
17331728
\mathbf{V} = \left(\begin{array}{ccc}
17341729
1 & x_1 & \ldots & x_1^m \\
@@ -1737,7 +1732,10 @@ it is rectangular in this case:
17371732
1 &x_n & \ldots & x_n^m \\
17381733
\end{array}\right) \in \mathbb{R}^{n\times (m+1)}
17391734
```
1735+
"""
17401736

1737+
# ╔═╡ 20832145-54ba-4503-8099-e49b45f5024f
1738+
md"""
17411739
In polynomial regression our job is now to minimise expression (16),
17421740
which means that we want to find the coefficient vector $\mathbf{c}$,
17431741
which minimises $\|\mathbf{y} - \mathbf{V} \mathbf{c} \|_2^2$.
@@ -1868,29 +1866,29 @@ Here, we will restrict ourselves to exploring the parameter space a little using
18681866
Foldable("Some interesting experiments with the visualisation below.",
18691867
md"""
18701868
**Polynomial interpolation regime: Sanity check with our results from before:**
1871-
- `n = 20` and `η_log` small: Increase degree `m` slowly up to `20`: you should observe Runge's phaenomenon for large `m` as before.
1872-
- Keep `n = 20` and `m = 20` and increase the noise `η_log`: The error drastically increases (well beyond the original noise level `η_log`); the fit is very unstable as we are essentially doing polynomial interpolation with equispaced points.
1869+
- `n = 20` and noise `ε` small (e.g. `ε = 0.001`): Increase degree `m` slowly up to `20`: you should observe Runge's phaenomenon for large `m` as before.
1870+
- Keep `n = 20` and `m = 20` and increase the noise `ε`: The error drastically increases (well beyond the original noise level `ε`); the fit is very unstable as we are essentially doing polynomial interpolation with equispaced points.
18731871
- In general for `m = n` (polynomial interpolation) the situation is similar, try for example `n=15` and `m=15` with varying noise.
18741872
18751873
**The regime $n \gg m$ of least-squares regression:**
1876-
- Set `m=15`, `m=15` and `η_log = -1`, so polynomial interpolation with large noise. The errors are fairly large. Now increase `n` slowly to `50`: All of a sudden the errors are in control and basically never exceed `η_log`. Play with `η_log` by making it even larger: You should see the error remains well below `η_log` in all cases.
1877-
- Set `m=40` and `η_log = -0.5` and slide the degree `m` between `20` and `10`. At `m=10` the error is notacibly smaller than at `m=20`. This suggests that somehow the ratio $\frac{m}{n}$ has some influence to what extend measurement noise $η$ translates to an error in the outcome of polynomial regression.
1878-
- Set `n=20` and `η_log = -1.5` and slide the polynomial degree. We realise that there is sweet spot aronud `m=10` when the error is overall smallest. At too low polynomial degrees $m$ the model is not rich enough to approximate the sine, while at too large degrees $m$ the ratio $\frac{m}{n}$ gets large we get into the regime of a polynomial interpolation. Therefore the numerical noise begins to amplify, resulting in larger and larger errors as we keep increasing $m$.
1874+
- Set `m=15`, `m=15` and `ε = 0.1`, so polynomial interpolation with large noise. The errors are fairly large. Now increase `n` slowly to `50`: All of a sudden the errors are in control and basically never exceed `ε`. Play with `ε` by making it even larger: You should see the error remains well below `ε` in all cases.
1875+
- Set `n=40` and `ε = 0.316` and slide the degree `m` between `20` and `10`. At `m=10` the error is notacibly smaller than at `m=20`. This suggests that somehow the ratio $\frac{m}{n}$ has some influence to what extend measurement noise $η$ translates to an error in the outcome of polynomial regression.
1876+
- Set `n=20` and `ε = 0.0316` and slide the polynomial degree. We realise that there is sweet spot aronud `m=10` when the error is overall smallest. At too low polynomial degrees $m$ the model is not rich enough to approximate the sine, while at too large degrees $m$ the ratio $\frac{m}{n}$ gets large we get into the regime of a polynomial interpolation. Therefore the numerical noise begins to amplify, resulting in larger and larger errors as we keep increasing $m$.
18791877
""")
18801878

18811879
# ╔═╡ 4562b2f4-3fcb-4c60-83b6-cafd6e4b3144
18821880
md"""
18831881
- Number of samples `n = ` $(@bind n Slider(10:5:50; show_value=true, default=20))
18841882
- Polynomial degree `m = ` $(@bind m Slider(1:20; show_value=true, default=2))
1885-
- Logarithm of noise amplitude `η_log = ` $(@bind η_log Slider(-3:0.25:0; show_value=true, default=-2))
1883+
- Noise amplitude `ε = ` $(@bind ε Slider(10 .^ (-3:0.25:0); show_value=true, default=0.01))
18861884
"""
18871885

18881886
# ╔═╡ 7829afd0-2693-40dc-b2d5-c8da72e96454
18891887
let
18901888
fine = range(-1.0, 1.0; length=500)
18911889

18921890
nodes = range(BigFloat(-1), BigFloat(1); length=n)
1893-
noise = [10^η_log * (-1 + 2rand()) for _ in 1:n]
1891+
noise = [ε * (-1 + 2rand()) for _ in 1:n]
18941892
data = fsin.(nodes) + noise
18951893

18961894
pₘᴸˢ = Polynomials.fit(nodes, data, m+1)
@@ -1902,7 +1900,7 @@ let
19021900
plot!(p, fine, pₘᴸˢ.(fine); label=L"regression $p_m^\textrm{LS}$", lw=2.5)
19031901

19041902
q = plot(fine, abs.(fsin.(fine) .- pₘᴸˢ.(fine)), label="error", yaxis=:log, xlim=(-1, 1), ylims=(10e-5, 1), legend=:bottomright, c=3, lw=1.5)
1905-
hline!(q, [10^η_log], label=L"noise $\eta$", c=3, lw=1, ls=:dash)
1903+
hline!(q, [ε], label=L"noise $\epsilon$", c=3, lw=1, ls=:dash)
19061904

19071905
plot(p, q; layout=grid(2, 1; heights=[0.75, 0.25]))
19081906
end
@@ -1952,6 +1950,8 @@ md"""
19521950
- The **other solution** is to use **non-equally spaced points**:
19531951
* The typical approach are **Chebyshev nodes**
19541952
* These lead to **exponential convergence**
1953+
1954+
Notice that all of these problems lead to linear systems $\textbf A \textbf x = \textbf b$ that we need to solve. How this can me done numerically we will see in [Direct methods for linear systems](https://teaching.matmat.org/numerical-analysis/06_Direct_methods.html).
19551955
"""
19561956

19571957
# ╔═╡ 2240f8bc-5c0b-450a-b56f-2b53ca66bb03
@@ -3314,7 +3314,6 @@ version = "1.4.1+2"
33143314
# ╟─ebe08f78-d331-4563-9ef8-f99355ff672e
33153315
# ╟─5e19f1a7-985e-4fb7-87c4-5113b5615521
33163316
# ╟─d5de3b11-7781-4100-8a63-d26426685bbc
3317-
# ╟─e4ce9bd3-2b8c-458f-aade-b74fd973d19e
33183317
# ╟─4a4540a3-b4cf-47fd-a615-a5280505333f
33193318
# ╟─50a3face-5831-4ef5-b6a5-225b8b7c46a0
33203319
# ╟─2cefad7d-d734-4342-8039-aefdc33c2edd
@@ -3352,14 +3351,15 @@ version = "1.4.1+2"
33523351
# ╟─7bd4720f-3ec8-459a-91be-17d5f2acaa5c
33533352
# ╟─66cfe319-c19e-478f-8f36-645fa1ff8b3b
33543353
# ╟─f6916b49-4fce-429c-9f55-d9a51dc46937
3355-
# ╟─2122f3c9-a901-4488-bd1b-4bd7e49b7c8a
3354+
# ╟─2521d29f-64ee-4221-8d92-d83e71068758
3355+
# ╟─20832145-54ba-4503-8099-e49b45f5024f
33563356
# ╟─30899dd5-b307-415d-bb94-efd09e7d0864
33573357
# ╟─d846d3f2-8cfc-4fc5-a22b-d55762f88f45
33583358
# ╠═0b8f2bcc-0948-47ed-bd62-4a0ceeee9156
33593359
# ╟─d3fdad97-275d-49d8-a4aa-b1f6438a01b8
33603360
# ╟─14bb1d8e-dd73-4795-9bb9-0213e83020d9
33613361
# ╟─4562b2f4-3fcb-4c60-83b6-cafd6e4b3144
3362-
# ╠═7829afd0-2693-40dc-b2d5-c8da72e96454
3362+
# ╟─7829afd0-2693-40dc-b2d5-c8da72e96454
33633363
# ╟─c2431601-afc7-4b37-9113-5ae85d4e5549
33643364
# ╟─ebf6b5f4-b963-4296-9255-47e426d2d12d
33653365
# ╟─2240f8bc-5c0b-450a-b56f-2b53ca66bb03

0 commit comments

Comments
 (0)