From b8af1fa23cc77cb3e3bcddb9b326168037570bdc Mon Sep 17 00:00:00 2001 From: Gaspard Jankowiak Date: Wed, 18 Dec 2024 17:07:43 +0100 Subject: [PATCH] import --- .gitignore | 1 + hela.py | 27 +++++ ica.py | 36 ++++++ kmeans.py | 33 ++++++ optim_mixture.jl | 259 ++++++++++++++++++++++++++++++++++++++++ pca.py | 301 +++++++++++++++++++++++++++++++++++++++++++++++ pixel_spectra.py | 20 ++++ slideshow.py | 51 ++++++++ utils.py | 35 ++++++ 9 files changed, 763 insertions(+) create mode 100644 .gitignore create mode 100644 hela.py create mode 100644 ica.py create mode 100644 kmeans.py create mode 100644 optim_mixture.jl create mode 100644 pca.py create mode 100644 pixel_spectra.py create mode 100644 slideshow.py create mode 100644 utils.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bee8a64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/hela.py b/hela.py new file mode 100644 index 0000000..b2ab164 --- /dev/null +++ b/hela.py @@ -0,0 +1,27 @@ +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.gridspec import GridSpec + +import utils + +import pca +import pixel_spectra + +from sklearn.decomposition import PCA, FastICA + +size = 512 + +# pca.pca(2, size, size, method="sparse", alpha=1000) +# pca.pca(2, size, size, method="kernel", kernel='poly', scale=True) +# pca.pca(2, size, size, method="ica", scale=False) +# pca.pca_cluster(3, 4, size, size, scale=False, cluster_method="kmeans") +sliders = pca.pca(2, size, size, scale=False, sliders=True) +sliders = pca.pca(2, size, size, scale=False, sliders=True, selected=[17, 7]) + +# kmeans.kmeans(8, 128, 128) + +# pixel_spectra.pixel_spectra(100) + +# slideshow.slideshow() + +plt.show() diff --git a/ica.py b/ica.py new file mode 100644 index 0000000..87c22bc --- /dev/null +++ b/ica.py @@ -0,0 +1,36 @@ +import utils +import numpy as np +from sklearn.decomposition import FastICA +import matplotlib.pyplot as plt +from matplotlib.gridspec import GridSpec + +def ica(n_components, slab_width, slab_height): + slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height) + + X = utils.as_np_array(slabs).T + n_freqs, n_pixels = X.shape[1], X.shape[0] + + p = FastICA(n_components = n_components, whiten="arbitrary-variance") + Y = p.fit_transform(X) + A = p.mixing_ + + new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)] + + f = plt.figure(layout="constrained") + gs = GridSpec(2, 3, figure=f) + axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]), + f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]), + f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]) + ] + + axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02) + + axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max) + axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1") + + for c in range(n_components): + axes[c+2].imshow(new_coords[c]) + axes[c+2].set_title(f"Component #{c+1}") + + plt.show() +#ica(3, 512, 512) diff --git a/kmeans.py b/kmeans.py new file mode 100644 index 0000000..714be8c --- /dev/null +++ b/kmeans.py @@ -0,0 +1,33 @@ +import utils +import matplotlib.pyplot as plt +from sklearn.cluster import KMeans + +def kmeans(n_clusters, slab_width, slab_height): + slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height) + + X = utils.as_np_array(slabs) + + f, axes = plt.subplots(1, 3) + + n_freqs, n_pixels = X.shape[0], X.shape[1] + + X = X.T + + km_estimator = KMeans(n_clusters=n_clusters) + km_estimator.fit(X) + centers = km_estimator.cluster_centers_ + + labels = km_estimator.labels_.reshape((slab_width, slab_height)) + + for i in range(X.shape[0]): + axes[0].plot(utils.energies, X[i,:], c='blue', alpha=0.1) + + for i in range(centers.shape[0]): + axes[0].plot(utils.energies, centers[i,:], c='black', alpha=1) + + axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max) + + axes[2].imshow(labels) + + plt.show() + diff --git a/optim_mixture.jl b/optim_mixture.jl new file mode 100644 index 0000000..8b74cf4 --- /dev/null +++ b/optim_mixture.jl @@ -0,0 +1,259 @@ +module OptimMixture + +import DelimitedFiles as DF +import BenchmarkTools as BT +import UnicodePlots as UP +import LinearAlgebra: ⋅ + +const ENERGIES = [2803, 2811, 2819, 2826, 2834, 2842, 2850, 2858, 2866, 2874, + 2882, 2890, 2897, 2905, 2913, 2921, 2929, 2937, 2945, 2953, + 2961, 2969, 2977, 2985, 2993, 3001, 3009, 3018, 3026, 3034, + 3042, 3050] + +@kwdef struct Slab + energy::Int + data::Matrix{Int} +end + +function load_slabs(width::Int=512, height::Int=512) + slabs = Slab[]() + i_min, i_max = typemax(Int), 0 + for e in ENERGIES + slab = Slab(energy=e, data=DF.readdlm("test_sample/HeLa_F-SRS_512x512_2803cm-1.txt", ',', Int)) + push!(slabs, slab) + + i_min, i_max = min(i_min, minimum(slab.data)), max(i_max, maximum(slab.data)) + + return slabs, i_min, i_max + end +end + +function Q_matrix_transpose(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int) + return sum(abs2, Y - view(X, 1:N, :) * view(X, (N+1):size(X, 1), :)') +end + +function Q_matrix_reshape(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int) + N_plus_M, m = size(X) + return sum(abs2, Y - view(X, 1:N, :) * reshape(view(X, (N+1):N_plus_M, :), m, N_plus_M - N)) +end + +function E_loop(X::Matrix{Float64}, Y::Matrix{Float64}) + s = 0 + N, M = size(Y) + m = size(X, 2) + for i in 1:N + for j in N+1:N+M + x = 0 + for k in 1:m + @inbounds x += X[i, k] * X[j, k] + end + @inbounds s += (x - Y[i, j-N])^2 + end + end + return s +end + +function Q_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, N::Int) + N_plus_M, m = size(X) + for i in 1:N + for j in N+1:N_plus_M + x = 0 + for k in 1:m + @inbounds x += X[i, k] * X[j, k] + end + @inbounds dst[i, j-N] = x + end + end +end + +function DE_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, Y::Matrix{Float64}) + dst_W = zeros(size(Y)) + DE_loop!(dst, dst_W, X, Y) +end + +function dot_prod_dual_loop(W::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64}) + _, m = size(X) + N, M = size(W) + + s = 0 + + for k in 1:m + for i in 1:N + x = 0 + for j in 1:M + @inbounds x += W[i, j] * X[N+j, k] + end + s += x * V[i, k] + end + for j in 1:M + x = 0 + for i in 1:N + @inbounds x += W[i, j] * X[i, k] + end + s += x * V[N+j, k] + end + end + + return s +end + +function dot_prod_primal_loop(W::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64}) + _, m = size(X) + N, M = size(W) + + s = 0 + + for i in 1:N + for j in 1:M + x = 0 + for k in 1:m + @inbounds x += X[i, k] * V[N+j, k] + X[N+j, k] * V[i, k] + + end + s += x * W[i, j] + end + end + + return s +end + +function DE_loop!(dst::Matrix{Float64}, dst_W::Matrix{Float64}, X::Matrix{Float64}, Y::Matrix{Float64}) + _, m = size(X) + N, M = size(Y) + # dst is the same size as X + # we need storage of size Y + Q_loop!(dst_W, X, N) + # compute W = Q(X) - Y + dst_W .-= Y + for k in 1:m + for i in 1:N + x = 0 + for j in 1:M + @inbounds x += dst_W[i, j] * X[N+j, k] + end + @inbounds dst[i, k] = x + end + for j in 1:M + x = 0 + for i in 1:N + @inbounds x += dst_W[i, j] * X[i, k] + end + @inbounds dst[N+j, k] = x + end + end + dst .*= 2 +end + +function DQ_V_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64}, N::Int) + N_plus_M, m = size(X) + for i in 1:N + for j in N+1:N_plus_M + x = 0 + for k in 1:m + @inbounds x += (V[i, k] * X[j, k] + X[i, k] * V[j, k]) + end + @inbounds dst[i, j-N] = x + end + end +end + +function Q_loop_transposed(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int) + s = 0 + m, N_plus_M = size(X) + for i in 1:N + for j in N+1:N_plus_M + x = 0 + for k in 1:m + @inbounds x += X[k, i] * X[k, j] + end + @inbounds s += (Y[j-N, i] - x)^2 + end + end + return s +end + +function test_dot_prod(N::Int, M::Int, m::Int) + X = rand(N + M, m) + W = rand(N, M) + V = rand(N + M, m) .- 0.5 + + @show dot_prod_primal_loop(W, X, V) + @show dot_prod_dual_loop(W, X, V) +end + +function test_DE(N::Int, M::Int, m::Int) + X = rand(N + M, m) + Y = rand(N, M) + V = rand(N + M, m) .- 0.5 + + EX = E_loop(X, Y) + grad_EX = zeros(size(X)) + DE_loop!(grad_EX, X, Y) + + eps = [10.0^k for k in 1:-1:-12] + errors = zeros(size(eps)) + + for i in eachindex(eps) + EX_V_true = E_loop(X + eps[i] * V, Y) + EX_V_approx = EX + eps[i] * sum(grad_EX .* V) + @show EX_V_true + @show EX_V_approx + errors[i] = abs(EX_V_true - EX_V_approx) + end + + @show eps + @show errors + + foo = UP.lineplot(eps, errors, xscale=:log10, yscale=:log10) + UP.lineplot!(foo, eps, eps) + println(foo) +end + +function test_DQ(N::Int, M::Int, m::Int) + X = rand(N + M, m) * 255 + V = rand(N + M, m) + + QX = zeros(N, M) + Q_loop!(QX, X, N) + + dst_true = zeros(N, M) + dst_approx = zeros(N, M) + + eps = [10.0^k for k in 1:-1:-8] + errors = zeros(size(eps)) + + for i in eachindex(eps) + Q_loop!(dst_true, X + eps[i] * V, N) + DQ_V_loop!(dst_approx, X, eps[i] * V, N) + dst_approx .+= QX + errors[i] = sum(abs, dst_approx - dst_true) + end + + @show eps + @show errors + + println(UP.lineplot(eps, errors, xscale=:log10, yscale=:log10)) +end + +function test(N::Int, M::Int, m::Int) + Y = rand(N, M) * 255 + X = rand(N + M, m) * 255 + + X_pre = copy(X) + X_pre[N+1:N+M] .= vec(X_pre[N+1:N+M]') + + r_matrix = Q_matrix_transpose(X, Y, N) + r_loop = Q_loop(X, Y, N) + + @show r_matrix, r_loop, abs(r_matrix - r_loop) / r_loop +end + +end # module + +# OptimMixture.test(512 * 512, 20, 2) +# OptimMixture.test_DQ(512 * 512, 20, 2) +OptimMixture.test_DE(512 * 512, 20, 2) +# OptimMixture.test_DE(20, 2, 2) +# OptimMixture.test_dot_prod(512^2, 20, 2) + +# vim: ts=2:sw=2:sts=2 diff --git a/pca.py b/pca.py new file mode 100644 index 0000000..4fd2a3d --- /dev/null +++ b/pca.py @@ -0,0 +1,301 @@ +import utils +import numpy as np +from sklearn.decomposition import PCA, SparsePCA, KernelPCA, FastICA +from sklearn.cluster import SpectralClustering, KMeans +import matplotlib.pyplot as plt +from matplotlib.gridspec import GridSpec + +from matplotlib.widgets import Slider + +def pca(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, sliders=False, selected=[]): + if sliders: + return pca_sliders(n_components, slab_width, slab_height, pca_method=pca_method, alpha=alpha, kernel=kernel, scale=scale, selected=selected) + slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height) + + X = utils.as_np_array(slabs).T + n_freqs, n_pixels = X.shape[1], X.shape[0] + + if scale: + X = utils.scale(X) + + if pca_method == "full": + p = PCA(n_components = n_components) + elif pca_method == "sparse": + p = SparsePCA(n_components = n_components, alpha=alpha) + elif pca_method == "kernel": + p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True) + elif pca_method == "ica": + p = FastICA(n_components = n_components, whiten="arbitrary-variance") + else: + print(f"unknown PCA pca_method {pca_method}") + return + + Y = p.fit_transform(X) + if pca_method in ["full", "sparse", "ica"]: + W = p.components_ + else: + W = p.dual_coef_ + + dots_idc = np.where(Y[:,0] > 0.6) + + new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)] + for c in range(n_components): + new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp() + + f = plt.figure(layout="constrained") + gs = GridSpec(2, 3, figure=f) + axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]), + f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]), + f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]) + ] + + axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02) + axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black") + + axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max) + axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1") + + for c in range(n_components-1): + # mask = new_coords[c] > 0.6 + cb = axes[c+2].imshow(new_coords[c]) + # cb = axes[c+2].imshow(mask) + plt.colorbar(cb) + axes[c+2].set_title(f"Component #{c+1}") + + axes[4].hist(Y[:,0], bins=25) + + if pca_method in ["full", "sparse", "ica"]: + for c in range(n_components-1): + axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid") + axes[5].axvline(2845, color="black", lw=0.5) + axes[5].axvline(2930, color="black", lw=0.5) + axes[5].set_title("Coefficients for each component") + + plt.legend(title="Component") + plt.suptitle(f"PCA Method: {pca_method}") + +def pca_sliders(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, selected=[]): + slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height) + + i_idc = np.array([[i for i in range(slab_height)] for j in range(slab_width)]).flatten() + j_idc = np.array([[j for i in range(slab_height)] for j in range(slab_width)]).flatten() + + X = utils.as_np_array(slabs).T + n_pixels, n_freqs = X.shape + + if scale: + X = utils.scale(X) + + if pca_method == "full": + p = PCA(n_components = n_components) + elif pca_method == "sparse": + p = SparsePCA(n_components = n_components, alpha=alpha) + elif pca_method == "kernel": + p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True) + elif pca_method == "ica": + p = FastICA(n_components = n_components, whiten="arbitrary-variance") + else: + print(f"unknown PCA pca_method {pca_method}") + return + Y_pca = p.fit_transform(X) + + if len(selected) > 0: + Y = X[:,selected] + else: + Y = Y_pca + + if pca_method in ["full", "sparse", "ica"]: + W = p.components_ + else: + W = p.dual_coef_ + + thres_1 = 0.6 + thres_2 = 0.2 + angle = 0.0 + + def rot_mat(x): + return np.array([[np.cos(x), -np.sin(x)],[np.sin(x), np.cos(x)]]) + + def compute_dots_idc(thres_1, thres_2, angle): + M = rot_mat(angle) + + YY = np.dot(M, (Y[:, :2] - [thres_1, thres_2]).T) + + return np.where((YY[0,:] >= 0) * (YY[1,:] >= 0))[0] + + dots_idc = compute_dots_idc(thres_1, thres_2, angle) + + new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)] + for c in range(n_components): + new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp() + + f = plt.figure(layout="constrained") + gs = GridSpec(2, 3, left=0, right=0.9) + axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]), + f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]), + f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]), + # f.add_subplot(gs[2,0]), f.add_subplot(gs[2,2]) + ] + + axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02) + pca_scatter = axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black") + + axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max) + axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1") + overlay_scatter = axes[1].scatter(i_idc[dots_idc], j_idc[dots_idc], s=0.1, color="black") + + f_img, ax_img = plt.subplots() + + ax_img.imshow(slabs[15]["data"], vmin=i_min, vmax=i_max) + ax_img.set_title(f"data @ {slabs[15]["e"]}cm-1") + overlay_scatter_img = ax_img.scatter(i_idc[dots_idc], j_idc[dots_idc], s=1, color="black") + + for c in range(n_components): + # mask = new_coords[c] > 0.6 + cb = axes[c+2].imshow(new_coords[c]) + # cb = axes[c+2].imshow(mask) + plt.colorbar(cb) + if len(selected) > 0: + axes[c+2].set_title(f"Energy #{c+1} ({slabs[selected[c]]["e"]}cm-1)") + else: + axes[c+2].set_title(f"PCA component #{c+1}") + + if len(selected) > 0: + axes[4].hist(Y[:,0], bins=25, ec="black", histtype="step") + axes[4].hist(Y[:,1], bins=25, ec="grey", histtype="step") + else: + axes[4].hist(Y[:,0], bins=25) + axes[4].hist(Y[:,1], bins=25) + axes[4].axvline(thres_1, lw=0.5, color="black") + axes[4].axvline(thres_2, lw=0.5, color="black") + + if pca_method in ["full", "sparse", "ica"]: + for c in range(n_components): + axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid") + axes[5].axvline(2845, color="black", lw=0.5) + axes[5].axvline(2930, color="black", lw=0.5) + axes[5].set_title("Coefficients for each component") + axes[5].legend(title="Component") + for e in selected: + axes[5].axvline(utils.energies[e], ls="dotted") + + # Make a horizontal slider to control the frequency. + thres_1_axis = f.add_axes([0.91, 0.05, 0.03, 0.9]) + thres_1_slider = Slider( + ax=thres_1_axis, + label='Thres 1', + valmin=Y[:,0].min(), + valstep=1, + valmax=Y[:,0].max(), + valinit=0.5*(Y[:,0].min()+Y[:,0].max()), + orientation="vertical" + ) + thres_2_axis = f.add_axes([0.94, 0.05, 0.03, 0.9]) + thres_2_slider = Slider( + ax=thres_2_axis, + label='Thres 2', + valmin=Y[:,1].min(), + valstep=1, + valmax=Y[:,1].max(), + valinit=0.5*(Y[:,1].min()+Y[:,1].max()), + orientation="vertical" + ) + + angle_axis = f.add_axes([0.97, 0.05, 0.03, 0.9]) + angle_slider = Slider( + ax=angle_axis, + label='Angle', + valmin=0, + valstep=0.01, + valmax=2*np.pi, + valinit=0, + orientation="vertical" + ) + + # # The function to be called anytime a slider's value changes + def update(val): + dots_idc = compute_dots_idc(thres_1_slider.val, thres_2_slider.val, angle_slider.val) + new_offsets = Y[dots_idc,:2] + pca_scatter.set_offsets(new_offsets) + new_offsets = np.c_[i_idc[dots_idc], j_idc[dots_idc]] + overlay_scatter.set_offsets(new_offsets) + overlay_scatter_img.set_offsets(new_offsets) + f_img.canvas.draw_idle() + + def handle_click(event): + if event.inaxes != axes[0]: return + thres_1_slider.set_val(event.xdata) + thres_2_slider.set_val(event.ydata) + update(0) + + f.canvas.mpl_connect('button_press_event', handle_click) + + thres_1_slider.on_changed(update) + thres_2_slider.on_changed(update) + angle_slider.on_changed(update) + + plt.suptitle(f"PCA Method: {pca_method}") + + return [thres_1_slider, thres_2_slider, angle_slider] + +def pca_cluster(n_components, n_clusters, slab_width, slab_height, pca_method="full", cluster_method="spectral", alpha=1, kernel="linear", scale=False): + slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height) + + X = utils.as_np_array(slabs).T + n_freqs, n_pixels = X.shape[1], X.shape[0] + + if scale: + X = utils.scale(X) + + if pca_method == "full": + p = PCA(n_components = n_components) + elif pca_method == "sparse": + p = SparsePCA(n_components = n_components, alpha=alpha) + elif pca_method == "kernel": + p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True) + elif pca_method == "ica": + p = FastICA(n_components = n_components, whiten="arbitrary-variance") + else: + print(f"unknown PCA method {pca_method}") + return + + Y = p.fit_transform(X) + if pca_method in ["full", "sparse", "ica"]: + W = p.components_ + else: + W = p.dual_coef_ + + if cluster_method == "spectral": + Z = SpectralClustering(n_clusters=n_clusters, affinity="nearest_neighbors").fit(Y).labels_.astype(int) + elif cluster_method == "kmeans": + Z = KMeans(n_clusters=n_clusters).fit(Y).labels_.astype(int) + else: + print(f"unknown clustering method {cluster_method}") + return + + labels = Z.reshape((slab_height, slab_width)) + + new_coords = np.array([Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]) + new_coords = new_coords / new_coords.sum(0) + + f = plt.figure(layout="constrained") + gs = GridSpec(2, 3, figure=f) + axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]), + f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]), + f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]) + ] + + axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02, c=Z) + + axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max) + axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1") + + for c in range(n_components): + cb = axes[c+2].imshow(new_coords[c]) + plt.colorbar(cb) + axes[c+2].set_title(f"Component #{c+1}") + + axes[5].imshow(labels) + axes[5].set_title("Labels") + + plt.suptitle(f"PCA Method: {pca_method}") diff --git a/pixel_spectra.py b/pixel_spectra.py new file mode 100644 index 0000000..c6d1c48 --- /dev/null +++ b/pixel_spectra.py @@ -0,0 +1,20 @@ +import utils +import matplotlib.pyplot as plt +import numpy as np + +def pixel_spectra(max_pixels): + slabs, i_min, i_max = utils.load_slabs() + + X = utils.as_np_array(slabs) + + f, axes = plt.subplots() + + n_freqs, n_pixels = X.shape[0], X.shape[1] + + generator = np.random.Generator(np.random.PCG64()) + idc = np.unique(generator.choice(range(n_pixels), min(n_pixels, max_pixels))) + + for i in idc: + axes.plot(utils.energies, X[:,i], c='blue', alpha=0.1) + plt.show() + diff --git a/slideshow.py b/slideshow.py new file mode 100644 index 0000000..351ba08 --- /dev/null +++ b/slideshow.py @@ -0,0 +1,51 @@ +import utils +import matplotlib.pyplot as plt + +from matplotlib.widgets import Slider + +def slideshow(): + slabs, i_min, i_max = utils.load_slabs() + + f, axes = plt.subplots(2, 3, height_ratios=[10, 1]) + + for ax in axes.flat: + ax.set_axis_off() + + ax, axfreq = axes[:,0] + implot = ax.imshow(slabs[0]["data"], vmin=i_min, vmax=i_max) + s = slabs[0] + ax.set_title(f"Energy: {s["e"]}cm-1, total intensity: {s["total"]:.2e}") + f.subplots_adjust(bottom=0.25) + + # Make a horizontal slider to control the frequency. + freq_slider = Slider( + ax=axfreq, + label='Slab', + valmin=0, + valstep=1, + valmax=len(slabs)-1, + valinit=0, + ) + + # The function to be called anytime a slider's value changes + def update(val): + s = slabs[val] + implot.set_data(s["data"]) + ax.set_title(f"Energy: {s["e"]}cm-1, total intensity: {s["total"]:.2e}") + f.canvas.draw_idle() + + freq_slider.on_changed(update) + + ax_ch2, _ = axes[:,1] + ax_ch2.imshow(slabs[5]["data"], vmin=i_min, vmax=i_max) + s = slabs[5] + ax_ch2.set_title(f"CH2 ({s["e"]} cm-1)") + + ax_ch3, _ = axes[:,2] + ax_ch3.imshow(slabs[16]["data"], vmin=i_min, vmax=i_max) + s = slabs[16] + ax_ch3.set_title(f"CH3 ({s["e"]} cm-1)") + + plt.tight_layout() + plt.show() + diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..cb47e1a --- /dev/null +++ b/utils.py @@ -0,0 +1,35 @@ +import numpy as np +from sklearn.preprocessing import StandardScaler + +energies = [2803, 2811, 2819, 2826, 2834, 2842, 2850, 2858, 2866, 2874, + 2882, 2890, 2897, 2905, 2913, 2921, 2929, 2937, 2945, 2953, + 2961, 2969, 2977, 2985, 2993, 3001, 3009, 3018, 3026, 3034, + 3042, 3050] + +def load_slabs(width=512, height=512): + slabs = [] + i_min, i_max = np.inf, 0 + for e in energies: + s = np.genfromtxt(f"test_sample/HeLa_F-SRS_512x512_{e}cm-1.txt", delimiter=",")[0:height,0:width] + slabs.append({"e":e, "data":s, "total":int(s.sum())}) + + i_min, i_max = min(i_min, s.min()), max(i_max, s.max()) + + return slabs, i_min, i_max + +def as_np_array(slabs): + return np.array(list(map(lambda s:s["data"].flat, slabs))) + +def compute_errors(slabs, d=np.linalg.norm): + n = slabs.size(1) + errors = np.zeros(n, n) + for i in range(n): + for j in range(i+1, n): + errors[i,j] = errors[j,i] = d(slabs[i] - slabs[j]) + return errors + +def scale(X): + scaler = StandardScaler().set_output(transform="pandas") + Y = scaler.fit_transform(X) + print(Y.shape) + return Y