From b8af1fa23cc77cb3e3bcddb9b326168037570bdc Mon Sep 17 00:00:00 2001
From: Gaspard Jankowiak <gaspard.jankowiak@uni-graz.at>
Date: Wed, 18 Dec 2024 17:07:43 +0100
Subject: [PATCH] import

---
 .gitignore       |   1 +
 hela.py          |  27 +++++
 ica.py           |  36 ++++++
 kmeans.py        |  33 ++++++
 optim_mixture.jl | 259 ++++++++++++++++++++++++++++++++++++++++
 pca.py           | 301 +++++++++++++++++++++++++++++++++++++++++++++++
 pixel_spectra.py |  20 ++++
 slideshow.py     |  51 ++++++++
 utils.py         |  35 ++++++
 9 files changed, 763 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 hela.py
 create mode 100644 ica.py
 create mode 100644 kmeans.py
 create mode 100644 optim_mixture.jl
 create mode 100644 pca.py
 create mode 100644 pixel_spectra.py
 create mode 100644 slideshow.py
 create mode 100644 utils.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bee8a64
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/hela.py b/hela.py
new file mode 100644
index 0000000..b2ab164
--- /dev/null
+++ b/hela.py
@@ -0,0 +1,27 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.gridspec import GridSpec
+
+import utils
+
+import pca
+import pixel_spectra
+
+from sklearn.decomposition import PCA, FastICA
+
+size = 512
+
+# pca.pca(2, size, size, method="sparse", alpha=1000)
+# pca.pca(2, size, size, method="kernel", kernel='poly', scale=True)
+# pca.pca(2, size, size, method="ica", scale=False)
+# pca.pca_cluster(3, 4, size, size, scale=False, cluster_method="kmeans")
+sliders = pca.pca(2, size, size, scale=False, sliders=True)
+sliders = pca.pca(2, size, size, scale=False, sliders=True, selected=[17, 7])
+
+# kmeans.kmeans(8, 128, 128)
+
+# pixel_spectra.pixel_spectra(100)
+
+# slideshow.slideshow()
+
+plt.show()
diff --git a/ica.py b/ica.py
new file mode 100644
index 0000000..87c22bc
--- /dev/null
+++ b/ica.py
@@ -0,0 +1,36 @@
+import utils
+import numpy as np
+from sklearn.decomposition import FastICA
+import matplotlib.pyplot as plt
+from matplotlib.gridspec import GridSpec
+
+def ica(n_components, slab_width, slab_height):
+    slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
+
+    X = utils.as_np_array(slabs).T
+    n_freqs, n_pixels = X.shape[1], X.shape[0]
+
+    p = FastICA(n_components = n_components, whiten="arbitrary-variance")
+    Y = p.fit_transform(X)
+    A = p.mixing_
+
+    new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
+
+    f = plt.figure(layout="constrained")
+    gs = GridSpec(2, 3, figure=f)
+    axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
+            f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
+            f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
+            ]
+
+    axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
+
+    axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
+    axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
+
+    for c in range(n_components):
+        axes[c+2].imshow(new_coords[c])
+        axes[c+2].set_title(f"Component #{c+1}")
+
+    plt.show()
+#ica(3, 512, 512)
diff --git a/kmeans.py b/kmeans.py
new file mode 100644
index 0000000..714be8c
--- /dev/null
+++ b/kmeans.py
@@ -0,0 +1,33 @@
+import utils
+import matplotlib.pyplot as plt
+from sklearn.cluster import KMeans
+
+def kmeans(n_clusters, slab_width, slab_height):
+    slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
+
+    X = utils.as_np_array(slabs)
+
+    f, axes = plt.subplots(1, 3)
+
+    n_freqs, n_pixels = X.shape[0], X.shape[1]
+
+    X = X.T
+
+    km_estimator = KMeans(n_clusters=n_clusters)
+    km_estimator.fit(X)
+    centers = km_estimator.cluster_centers_
+
+    labels = km_estimator.labels_.reshape((slab_width, slab_height))
+
+    for i in range(X.shape[0]):
+        axes[0].plot(utils.energies, X[i,:], c='blue', alpha=0.1)
+
+    for i in range(centers.shape[0]):
+        axes[0].plot(utils.energies, centers[i,:], c='black', alpha=1)
+
+    axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
+
+    axes[2].imshow(labels)
+
+    plt.show()
+
diff --git a/optim_mixture.jl b/optim_mixture.jl
new file mode 100644
index 0000000..8b74cf4
--- /dev/null
+++ b/optim_mixture.jl
@@ -0,0 +1,259 @@
+module OptimMixture
+
+import DelimitedFiles as DF
+import BenchmarkTools as BT
+import UnicodePlots as UP
+import LinearAlgebra: ⋅
+
+const ENERGIES = [2803, 2811, 2819, 2826, 2834, 2842, 2850, 2858, 2866, 2874,
+  2882, 2890, 2897, 2905, 2913, 2921, 2929, 2937, 2945, 2953,
+  2961, 2969, 2977, 2985, 2993, 3001, 3009, 3018, 3026, 3034,
+  3042, 3050]
+
+@kwdef struct Slab
+  energy::Int
+  data::Matrix{Int}
+end
+
+function load_slabs(width::Int=512, height::Int=512)
+  slabs = Slab[]()
+  i_min, i_max = typemax(Int), 0
+  for e in ENERGIES
+    slab = Slab(energy=e, data=DF.readdlm("test_sample/HeLa_F-SRS_512x512_2803cm-1.txt", ',', Int))
+    push!(slabs, slab)
+
+    i_min, i_max = min(i_min, minimum(slab.data)), max(i_max, maximum(slab.data))
+
+    return slabs, i_min, i_max
+  end
+end
+
+function Q_matrix_transpose(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int)
+  return sum(abs2, Y - view(X, 1:N, :) * view(X, (N+1):size(X, 1), :)')
+end
+
+function Q_matrix_reshape(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int)
+  N_plus_M, m = size(X)
+  return sum(abs2, Y - view(X, 1:N, :) * reshape(view(X, (N+1):N_plus_M, :), m, N_plus_M - N))
+end
+
+function E_loop(X::Matrix{Float64}, Y::Matrix{Float64})
+  s = 0
+  N, M = size(Y)
+  m = size(X, 2)
+  for i in 1:N
+    for j in N+1:N+M
+      x = 0
+      for k in 1:m
+        @inbounds x += X[i, k] * X[j, k]
+      end
+      @inbounds s += (x - Y[i, j-N])^2
+    end
+  end
+  return s
+end
+
+function Q_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, N::Int)
+  N_plus_M, m = size(X)
+  for i in 1:N
+    for j in N+1:N_plus_M
+      x = 0
+      for k in 1:m
+        @inbounds x += X[i, k] * X[j, k]
+      end
+      @inbounds dst[i, j-N] = x
+    end
+  end
+end
+
+function DE_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, Y::Matrix{Float64})
+  dst_W = zeros(size(Y))
+  DE_loop!(dst, dst_W, X, Y)
+end
+
+function dot_prod_dual_loop(W::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64})
+  _, m = size(X)
+  N, M = size(W)
+
+  s = 0
+
+  for k in 1:m
+    for i in 1:N
+      x = 0
+      for j in 1:M
+        @inbounds x += W[i, j] * X[N+j, k]
+      end
+      s += x * V[i, k]
+    end
+    for j in 1:M
+      x = 0
+      for i in 1:N
+        @inbounds x += W[i, j] * X[i, k]
+      end
+      s += x * V[N+j, k]
+    end
+  end
+
+  return s
+end
+
+function dot_prod_primal_loop(W::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64})
+  _, m = size(X)
+  N, M = size(W)
+
+  s = 0
+
+  for i in 1:N
+    for j in 1:M
+      x = 0
+      for k in 1:m
+        @inbounds x += X[i, k] * V[N+j, k] + X[N+j, k] * V[i, k]
+
+      end
+      s += x * W[i, j]
+    end
+  end
+
+  return s
+end
+
+function DE_loop!(dst::Matrix{Float64}, dst_W::Matrix{Float64}, X::Matrix{Float64}, Y::Matrix{Float64})
+  _, m = size(X)
+  N, M = size(Y)
+  # dst is the same size as X
+  # we need storage of size Y
+  Q_loop!(dst_W, X, N)
+  # compute W = Q(X) - Y
+  dst_W .-= Y
+  for k in 1:m
+    for i in 1:N
+      x = 0
+      for j in 1:M
+        @inbounds x += dst_W[i, j] * X[N+j, k]
+      end
+      @inbounds dst[i, k] = x
+    end
+    for j in 1:M
+      x = 0
+      for i in 1:N
+        @inbounds x += dst_W[i, j] * X[i, k]
+      end
+      @inbounds dst[N+j, k] = x
+    end
+  end
+  dst .*= 2
+end
+
+function DQ_V_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64}, N::Int)
+  N_plus_M, m = size(X)
+  for i in 1:N
+    for j in N+1:N_plus_M
+      x = 0
+      for k in 1:m
+        @inbounds x += (V[i, k] * X[j, k] + X[i, k] * V[j, k])
+      end
+      @inbounds dst[i, j-N] = x
+    end
+  end
+end
+
+function Q_loop_transposed(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int)
+  s = 0
+  m, N_plus_M = size(X)
+  for i in 1:N
+    for j in N+1:N_plus_M
+      x = 0
+      for k in 1:m
+        @inbounds x += X[k, i] * X[k, j]
+      end
+      @inbounds s += (Y[j-N, i] - x)^2
+    end
+  end
+  return s
+end
+
+function test_dot_prod(N::Int, M::Int, m::Int)
+  X = rand(N + M, m)
+  W = rand(N, M)
+  V = rand(N + M, m) .- 0.5
+
+  @show dot_prod_primal_loop(W, X, V)
+  @show dot_prod_dual_loop(W, X, V)
+end
+
+function test_DE(N::Int, M::Int, m::Int)
+  X = rand(N + M, m)
+  Y = rand(N, M)
+  V = rand(N + M, m) .- 0.5
+
+  EX = E_loop(X, Y)
+  grad_EX = zeros(size(X))
+  DE_loop!(grad_EX, X, Y)
+
+  eps = [10.0^k for k in 1:-1:-12]
+  errors = zeros(size(eps))
+
+  for i in eachindex(eps)
+    EX_V_true = E_loop(X + eps[i] * V, Y)
+    EX_V_approx = EX + eps[i] * sum(grad_EX .* V)
+    @show EX_V_true
+    @show EX_V_approx
+    errors[i] = abs(EX_V_true - EX_V_approx)
+  end
+
+  @show eps
+  @show errors
+
+  foo = UP.lineplot(eps, errors, xscale=:log10, yscale=:log10)
+  UP.lineplot!(foo, eps, eps)
+  println(foo)
+end
+
+function test_DQ(N::Int, M::Int, m::Int)
+  X = rand(N + M, m) * 255
+  V = rand(N + M, m)
+
+  QX = zeros(N, M)
+  Q_loop!(QX, X, N)
+
+  dst_true = zeros(N, M)
+  dst_approx = zeros(N, M)
+
+  eps = [10.0^k for k in 1:-1:-8]
+  errors = zeros(size(eps))
+
+  for i in eachindex(eps)
+    Q_loop!(dst_true, X + eps[i] * V, N)
+    DQ_V_loop!(dst_approx, X, eps[i] * V, N)
+    dst_approx .+= QX
+    errors[i] = sum(abs, dst_approx - dst_true)
+  end
+
+  @show eps
+  @show errors
+
+  println(UP.lineplot(eps, errors, xscale=:log10, yscale=:log10))
+end
+
+function test(N::Int, M::Int, m::Int)
+  Y = rand(N, M) * 255
+  X = rand(N + M, m) * 255
+
+  X_pre = copy(X)
+  X_pre[N+1:N+M] .= vec(X_pre[N+1:N+M]')
+
+  r_matrix = Q_matrix_transpose(X, Y, N)
+  r_loop = Q_loop(X, Y, N)
+
+  @show r_matrix, r_loop, abs(r_matrix - r_loop) / r_loop
+end
+
+end # module
+
+# OptimMixture.test(512 * 512, 20, 2)
+# OptimMixture.test_DQ(512 * 512, 20, 2)
+OptimMixture.test_DE(512 * 512, 20, 2)
+# OptimMixture.test_DE(20, 2, 2)
+# OptimMixture.test_dot_prod(512^2, 20, 2)
+
+# vim: ts=2:sw=2:sts=2
diff --git a/pca.py b/pca.py
new file mode 100644
index 0000000..4fd2a3d
--- /dev/null
+++ b/pca.py
@@ -0,0 +1,301 @@
+import utils
+import numpy as np
+from sklearn.decomposition import PCA, SparsePCA, KernelPCA, FastICA
+from sklearn.cluster import SpectralClustering, KMeans
+import matplotlib.pyplot as plt
+from matplotlib.gridspec import GridSpec
+
+from matplotlib.widgets import Slider
+
+def pca(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, sliders=False, selected=[]):
+    if sliders:
+        return pca_sliders(n_components, slab_width, slab_height, pca_method=pca_method, alpha=alpha, kernel=kernel, scale=scale, selected=selected)
+    slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
+
+    X = utils.as_np_array(slabs).T
+    n_freqs, n_pixels = X.shape[1], X.shape[0]
+
+    if scale:
+        X = utils.scale(X)
+
+    if pca_method == "full":
+        p = PCA(n_components = n_components)
+    elif pca_method == "sparse":
+        p = SparsePCA(n_components = n_components, alpha=alpha)
+    elif pca_method == "kernel":
+        p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
+    elif pca_method == "ica":
+        p = FastICA(n_components = n_components, whiten="arbitrary-variance")
+    else:
+        print(f"unknown PCA pca_method {pca_method}")
+        return
+
+    Y = p.fit_transform(X)
+    if pca_method in ["full", "sparse", "ica"]:
+        W = p.components_
+    else:
+        W = p.dual_coef_
+
+    dots_idc = np.where(Y[:,0] > 0.6)
+
+    new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
+    for c in range(n_components):
+        new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp()
+
+    f = plt.figure(layout="constrained")
+    gs = GridSpec(2, 3, figure=f)
+    axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
+            f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
+            f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
+            ]
+
+    axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
+    axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black")
+
+    axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
+    axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
+
+    for c in range(n_components-1):
+        # mask = new_coords[c] > 0.6
+        cb = axes[c+2].imshow(new_coords[c])
+        # cb = axes[c+2].imshow(mask)
+        plt.colorbar(cb)
+        axes[c+2].set_title(f"Component #{c+1}")
+
+    axes[4].hist(Y[:,0], bins=25)
+
+    if pca_method in ["full", "sparse", "ica"]:
+        for c in range(n_components-1):
+            axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid")
+        axes[5].axvline(2845, color="black", lw=0.5)
+        axes[5].axvline(2930, color="black", lw=0.5)
+        axes[5].set_title("Coefficients for each component")
+
+        plt.legend(title="Component")
+    plt.suptitle(f"PCA Method: {pca_method}")
+
+def pca_sliders(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, selected=[]):
+    slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
+
+    i_idc = np.array([[i for i in range(slab_height)] for j in range(slab_width)]).flatten()
+    j_idc = np.array([[j for i in range(slab_height)] for j in range(slab_width)]).flatten()
+
+    X = utils.as_np_array(slabs).T
+    n_pixels, n_freqs = X.shape
+
+    if scale:
+        X = utils.scale(X)
+
+    if pca_method == "full":
+        p = PCA(n_components = n_components)
+    elif pca_method == "sparse":
+        p = SparsePCA(n_components = n_components, alpha=alpha)
+    elif pca_method == "kernel":
+        p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
+    elif pca_method == "ica":
+        p = FastICA(n_components = n_components, whiten="arbitrary-variance")
+    else:
+        print(f"unknown PCA pca_method {pca_method}")
+        return
+    Y_pca = p.fit_transform(X)
+
+    if len(selected) > 0:
+        Y = X[:,selected]
+    else:
+        Y = Y_pca
+
+    if pca_method in ["full", "sparse", "ica"]:
+        W = p.components_
+    else:
+        W = p.dual_coef_
+
+    thres_1 = 0.6
+    thres_2 = 0.2
+    angle = 0.0
+
+    def rot_mat(x):
+        return np.array([[np.cos(x), -np.sin(x)],[np.sin(x), np.cos(x)]])
+
+    def compute_dots_idc(thres_1, thres_2, angle):
+        M = rot_mat(angle)
+
+        YY = np.dot(M, (Y[:, :2] - [thres_1, thres_2]).T)
+
+        return np.where((YY[0,:] >= 0) * (YY[1,:] >= 0))[0]
+
+    dots_idc = compute_dots_idc(thres_1, thres_2, angle)
+
+    new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
+    for c in range(n_components):
+        new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp()
+
+    f = plt.figure(layout="constrained")
+    gs = GridSpec(2, 3, left=0, right=0.9)
+    axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
+            f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
+            f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]),
+            # f.add_subplot(gs[2,0]), f.add_subplot(gs[2,2])
+            ]
+
+    axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
+    pca_scatter = axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black")
+
+    axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
+    axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
+    overlay_scatter = axes[1].scatter(i_idc[dots_idc], j_idc[dots_idc], s=0.1, color="black")
+
+    f_img, ax_img = plt.subplots()
+
+    ax_img.imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
+    ax_img.set_title(f"data @ {slabs[15]["e"]}cm-1")
+    overlay_scatter_img = ax_img.scatter(i_idc[dots_idc], j_idc[dots_idc], s=1, color="black")
+
+    for c in range(n_components):
+        # mask = new_coords[c] > 0.6
+        cb = axes[c+2].imshow(new_coords[c])
+        # cb = axes[c+2].imshow(mask)
+        plt.colorbar(cb)
+        if len(selected) > 0:
+            axes[c+2].set_title(f"Energy #{c+1} ({slabs[selected[c]]["e"]}cm-1)")
+        else:
+            axes[c+2].set_title(f"PCA component #{c+1}")
+
+    if len(selected) > 0:
+        axes[4].hist(Y[:,0], bins=25, ec="black", histtype="step")
+        axes[4].hist(Y[:,1], bins=25, ec="grey",  histtype="step")
+    else:
+        axes[4].hist(Y[:,0], bins=25)
+        axes[4].hist(Y[:,1], bins=25)
+    axes[4].axvline(thres_1, lw=0.5, color="black")
+    axes[4].axvline(thres_2, lw=0.5, color="black")
+
+    if pca_method in ["full", "sparse", "ica"]:
+        for c in range(n_components):
+            axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid")
+        axes[5].axvline(2845, color="black", lw=0.5)
+        axes[5].axvline(2930, color="black", lw=0.5)
+        axes[5].set_title("Coefficients for each component")
+        axes[5].legend(title="Component")
+        for e in selected:
+            axes[5].axvline(utils.energies[e], ls="dotted")
+
+    # Make a horizontal slider to control the frequency.
+    thres_1_axis = f.add_axes([0.91, 0.05, 0.03, 0.9])
+    thres_1_slider = Slider(
+        ax=thres_1_axis,
+        label='Thres 1',
+        valmin=Y[:,0].min(),
+        valstep=1,
+        valmax=Y[:,0].max(),
+        valinit=0.5*(Y[:,0].min()+Y[:,0].max()),
+        orientation="vertical"
+    )
+    thres_2_axis = f.add_axes([0.94, 0.05, 0.03, 0.9])
+    thres_2_slider = Slider(
+        ax=thres_2_axis,
+        label='Thres 2',
+        valmin=Y[:,1].min(),
+        valstep=1,
+        valmax=Y[:,1].max(),
+        valinit=0.5*(Y[:,1].min()+Y[:,1].max()),
+        orientation="vertical"
+    )
+
+    angle_axis = f.add_axes([0.97, 0.05, 0.03, 0.9])
+    angle_slider = Slider(
+        ax=angle_axis,
+        label='Angle',
+        valmin=0,
+        valstep=0.01,
+        valmax=2*np.pi,
+        valinit=0,
+        orientation="vertical"
+    )
+
+    # # The function to be called anytime a slider's value changes
+    def update(val):
+        dots_idc = compute_dots_idc(thres_1_slider.val, thres_2_slider.val, angle_slider.val)
+        new_offsets = Y[dots_idc,:2]
+        pca_scatter.set_offsets(new_offsets)
+        new_offsets = np.c_[i_idc[dots_idc], j_idc[dots_idc]]
+        overlay_scatter.set_offsets(new_offsets)
+        overlay_scatter_img.set_offsets(new_offsets)
+        f_img.canvas.draw_idle()
+
+    def handle_click(event):
+        if event.inaxes != axes[0]: return
+        thres_1_slider.set_val(event.xdata)
+        thres_2_slider.set_val(event.ydata)
+        update(0)
+
+    f.canvas.mpl_connect('button_press_event', handle_click)
+
+    thres_1_slider.on_changed(update)
+    thres_2_slider.on_changed(update)
+    angle_slider.on_changed(update)
+
+    plt.suptitle(f"PCA Method: {pca_method}")
+
+    return [thres_1_slider, thres_2_slider, angle_slider]
+
+def pca_cluster(n_components, n_clusters, slab_width, slab_height, pca_method="full", cluster_method="spectral", alpha=1, kernel="linear", scale=False):
+    slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
+
+    X = utils.as_np_array(slabs).T
+    n_freqs, n_pixels = X.shape[1], X.shape[0]
+
+    if scale:
+        X = utils.scale(X)
+
+    if pca_method == "full":
+        p = PCA(n_components = n_components)
+    elif pca_method == "sparse":
+        p = SparsePCA(n_components = n_components, alpha=alpha)
+    elif pca_method == "kernel":
+        p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
+    elif pca_method == "ica":
+        p = FastICA(n_components = n_components, whiten="arbitrary-variance")
+    else:
+        print(f"unknown PCA method {pca_method}")
+        return
+
+    Y = p.fit_transform(X)
+    if pca_method in ["full", "sparse", "ica"]:
+        W = p.components_
+    else:
+        W = p.dual_coef_
+
+    if cluster_method == "spectral":
+        Z = SpectralClustering(n_clusters=n_clusters, affinity="nearest_neighbors").fit(Y).labels_.astype(int)
+    elif cluster_method == "kmeans":
+        Z = KMeans(n_clusters=n_clusters).fit(Y).labels_.astype(int)
+    else:
+        print(f"unknown clustering method {cluster_method}")
+        return
+
+    labels = Z.reshape((slab_height, slab_width))
+
+    new_coords = np.array([Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)])
+    new_coords = new_coords / new_coords.sum(0)
+
+    f = plt.figure(layout="constrained")
+    gs = GridSpec(2, 3, figure=f)
+    axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
+            f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
+            f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
+            ]
+
+    axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02, c=Z)
+
+    axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
+    axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
+
+    for c in range(n_components):
+        cb = axes[c+2].imshow(new_coords[c])
+        plt.colorbar(cb)
+        axes[c+2].set_title(f"Component #{c+1}")
+
+    axes[5].imshow(labels)
+    axes[5].set_title("Labels")
+
+    plt.suptitle(f"PCA Method: {pca_method}")
diff --git a/pixel_spectra.py b/pixel_spectra.py
new file mode 100644
index 0000000..c6d1c48
--- /dev/null
+++ b/pixel_spectra.py
@@ -0,0 +1,20 @@
+import utils
+import matplotlib.pyplot as plt
+import numpy as np
+
+def pixel_spectra(max_pixels):
+    slabs, i_min, i_max = utils.load_slabs()
+
+    X = utils.as_np_array(slabs)
+
+    f, axes = plt.subplots()
+
+    n_freqs, n_pixels = X.shape[0], X.shape[1]
+
+    generator = np.random.Generator(np.random.PCG64())
+    idc = np.unique(generator.choice(range(n_pixels), min(n_pixels, max_pixels)))
+
+    for i in idc:
+        axes.plot(utils.energies, X[:,i], c='blue', alpha=0.1)
+    plt.show()
+
diff --git a/slideshow.py b/slideshow.py
new file mode 100644
index 0000000..351ba08
--- /dev/null
+++ b/slideshow.py
@@ -0,0 +1,51 @@
+import utils
+import matplotlib.pyplot as plt
+
+from matplotlib.widgets import Slider
+
+def slideshow():
+    slabs, i_min, i_max = utils.load_slabs()
+
+    f, axes = plt.subplots(2, 3, height_ratios=[10, 1])
+
+    for ax in axes.flat:
+         ax.set_axis_off()
+
+    ax, axfreq = axes[:,0]
+    implot = ax.imshow(slabs[0]["data"], vmin=i_min, vmax=i_max)
+    s = slabs[0]
+    ax.set_title(f"Energy: {s["e"]}cm-1, total intensity: {s["total"]:.2e}")
+    f.subplots_adjust(bottom=0.25)
+
+    # Make a horizontal slider to control the frequency.
+    freq_slider = Slider(
+        ax=axfreq,
+        label='Slab',
+        valmin=0,
+        valstep=1,
+        valmax=len(slabs)-1,
+        valinit=0,
+    )
+
+    # The function to be called anytime a slider's value changes
+    def update(val):
+        s = slabs[val]
+        implot.set_data(s["data"])
+        ax.set_title(f"Energy: {s["e"]}cm-1, total intensity: {s["total"]:.2e}")
+        f.canvas.draw_idle()
+
+    freq_slider.on_changed(update)
+
+    ax_ch2, _ = axes[:,1]
+    ax_ch2.imshow(slabs[5]["data"], vmin=i_min, vmax=i_max)
+    s = slabs[5]
+    ax_ch2.set_title(f"CH2 ({s["e"]} cm-1)")
+
+    ax_ch3, _ = axes[:,2]
+    ax_ch3.imshow(slabs[16]["data"], vmin=i_min, vmax=i_max)
+    s = slabs[16]
+    ax_ch3.set_title(f"CH3 ({s["e"]} cm-1)")
+
+    plt.tight_layout()
+    plt.show()
+
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..cb47e1a
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,35 @@
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+
+energies = [2803, 2811, 2819, 2826, 2834, 2842, 2850, 2858, 2866, 2874,
+            2882, 2890, 2897, 2905, 2913, 2921, 2929, 2937, 2945, 2953,
+            2961, 2969, 2977, 2985, 2993, 3001, 3009, 3018, 3026, 3034,
+            3042, 3050]
+
+def load_slabs(width=512, height=512):
+    slabs = []
+    i_min, i_max = np.inf, 0
+    for e in energies:
+        s = np.genfromtxt(f"test_sample/HeLa_F-SRS_512x512_{e}cm-1.txt", delimiter=",")[0:height,0:width]
+        slabs.append({"e":e, "data":s, "total":int(s.sum())})
+
+        i_min, i_max = min(i_min, s.min()), max(i_max, s.max())
+
+    return slabs, i_min, i_max
+
+def as_np_array(slabs):
+    return np.array(list(map(lambda s:s["data"].flat, slabs)))
+
+def compute_errors(slabs, d=np.linalg.norm):
+    n = slabs.size(1)
+    errors = np.zeros(n, n)
+    for i in range(n):
+        for j in range(i+1, n):
+            errors[i,j] = errors[j,i] = d(slabs[i] - slabs[j])
+    return errors
+
+def scale(X):
+    scaler = StandardScaler().set_output(transform="pandas")
+    Y =  scaler.fit_transform(X)
+    print(Y.shape)
+    return Y