This commit is contained in:
Gaspard Jankowiak 2024-12-18 17:07:43 +01:00
commit b8af1fa23c
9 changed files with 763 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
__pycache__

27
hela.py Normal file
View file

@ -0,0 +1,27 @@
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import utils
import pca
import pixel_spectra
from sklearn.decomposition import PCA, FastICA
size = 512
# pca.pca(2, size, size, method="sparse", alpha=1000)
# pca.pca(2, size, size, method="kernel", kernel='poly', scale=True)
# pca.pca(2, size, size, method="ica", scale=False)
# pca.pca_cluster(3, 4, size, size, scale=False, cluster_method="kmeans")
sliders = pca.pca(2, size, size, scale=False, sliders=True)
sliders = pca.pca(2, size, size, scale=False, sliders=True, selected=[17, 7])
# kmeans.kmeans(8, 128, 128)
# pixel_spectra.pixel_spectra(100)
# slideshow.slideshow()
plt.show()

36
ica.py Normal file
View file

@ -0,0 +1,36 @@
import utils
import numpy as np
from sklearn.decomposition import FastICA
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
def ica(n_components, slab_width, slab_height):
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
X = utils.as_np_array(slabs).T
n_freqs, n_pixels = X.shape[1], X.shape[0]
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
Y = p.fit_transform(X)
A = p.mixing_
new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
f = plt.figure(layout="constrained")
gs = GridSpec(2, 3, figure=f)
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
]
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
for c in range(n_components):
axes[c+2].imshow(new_coords[c])
axes[c+2].set_title(f"Component #{c+1}")
plt.show()
#ica(3, 512, 512)

33
kmeans.py Normal file
View file

@ -0,0 +1,33 @@
import utils
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
def kmeans(n_clusters, slab_width, slab_height):
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
X = utils.as_np_array(slabs)
f, axes = plt.subplots(1, 3)
n_freqs, n_pixels = X.shape[0], X.shape[1]
X = X.T
km_estimator = KMeans(n_clusters=n_clusters)
km_estimator.fit(X)
centers = km_estimator.cluster_centers_
labels = km_estimator.labels_.reshape((slab_width, slab_height))
for i in range(X.shape[0]):
axes[0].plot(utils.energies, X[i,:], c='blue', alpha=0.1)
for i in range(centers.shape[0]):
axes[0].plot(utils.energies, centers[i,:], c='black', alpha=1)
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
axes[2].imshow(labels)
plt.show()

259
optim_mixture.jl Normal file
View file

@ -0,0 +1,259 @@
module OptimMixture
import DelimitedFiles as DF
import BenchmarkTools as BT
import UnicodePlots as UP
import LinearAlgebra:
const ENERGIES = [2803, 2811, 2819, 2826, 2834, 2842, 2850, 2858, 2866, 2874,
2882, 2890, 2897, 2905, 2913, 2921, 2929, 2937, 2945, 2953,
2961, 2969, 2977, 2985, 2993, 3001, 3009, 3018, 3026, 3034,
3042, 3050]
@kwdef struct Slab
energy::Int
data::Matrix{Int}
end
function load_slabs(width::Int=512, height::Int=512)
slabs = Slab[]()
i_min, i_max = typemax(Int), 0
for e in ENERGIES
slab = Slab(energy=e, data=DF.readdlm("test_sample/HeLa_F-SRS_512x512_2803cm-1.txt", ',', Int))
push!(slabs, slab)
i_min, i_max = min(i_min, minimum(slab.data)), max(i_max, maximum(slab.data))
return slabs, i_min, i_max
end
end
function Q_matrix_transpose(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int)
return sum(abs2, Y - view(X, 1:N, :) * view(X, (N+1):size(X, 1), :)')
end
function Q_matrix_reshape(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int)
N_plus_M, m = size(X)
return sum(abs2, Y - view(X, 1:N, :) * reshape(view(X, (N+1):N_plus_M, :), m, N_plus_M - N))
end
function E_loop(X::Matrix{Float64}, Y::Matrix{Float64})
s = 0
N, M = size(Y)
m = size(X, 2)
for i in 1:N
for j in N+1:N+M
x = 0
for k in 1:m
@inbounds x += X[i, k] * X[j, k]
end
@inbounds s += (x - Y[i, j-N])^2
end
end
return s
end
function Q_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, N::Int)
N_plus_M, m = size(X)
for i in 1:N
for j in N+1:N_plus_M
x = 0
for k in 1:m
@inbounds x += X[i, k] * X[j, k]
end
@inbounds dst[i, j-N] = x
end
end
end
function DE_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, Y::Matrix{Float64})
dst_W = zeros(size(Y))
DE_loop!(dst, dst_W, X, Y)
end
function dot_prod_dual_loop(W::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64})
_, m = size(X)
N, M = size(W)
s = 0
for k in 1:m
for i in 1:N
x = 0
for j in 1:M
@inbounds x += W[i, j] * X[N+j, k]
end
s += x * V[i, k]
end
for j in 1:M
x = 0
for i in 1:N
@inbounds x += W[i, j] * X[i, k]
end
s += x * V[N+j, k]
end
end
return s
end
function dot_prod_primal_loop(W::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64})
_, m = size(X)
N, M = size(W)
s = 0
for i in 1:N
for j in 1:M
x = 0
for k in 1:m
@inbounds x += X[i, k] * V[N+j, k] + X[N+j, k] * V[i, k]
end
s += x * W[i, j]
end
end
return s
end
function DE_loop!(dst::Matrix{Float64}, dst_W::Matrix{Float64}, X::Matrix{Float64}, Y::Matrix{Float64})
_, m = size(X)
N, M = size(Y)
# dst is the same size as X
# we need storage of size Y
Q_loop!(dst_W, X, N)
# compute W = Q(X) - Y
dst_W .-= Y
for k in 1:m
for i in 1:N
x = 0
for j in 1:M
@inbounds x += dst_W[i, j] * X[N+j, k]
end
@inbounds dst[i, k] = x
end
for j in 1:M
x = 0
for i in 1:N
@inbounds x += dst_W[i, j] * X[i, k]
end
@inbounds dst[N+j, k] = x
end
end
dst .*= 2
end
function DQ_V_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64}, N::Int)
N_plus_M, m = size(X)
for i in 1:N
for j in N+1:N_plus_M
x = 0
for k in 1:m
@inbounds x += (V[i, k] * X[j, k] + X[i, k] * V[j, k])
end
@inbounds dst[i, j-N] = x
end
end
end
function Q_loop_transposed(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int)
s = 0
m, N_plus_M = size(X)
for i in 1:N
for j in N+1:N_plus_M
x = 0
for k in 1:m
@inbounds x += X[k, i] * X[k, j]
end
@inbounds s += (Y[j-N, i] - x)^2
end
end
return s
end
function test_dot_prod(N::Int, M::Int, m::Int)
X = rand(N + M, m)
W = rand(N, M)
V = rand(N + M, m) .- 0.5
@show dot_prod_primal_loop(W, X, V)
@show dot_prod_dual_loop(W, X, V)
end
function test_DE(N::Int, M::Int, m::Int)
X = rand(N + M, m)
Y = rand(N, M)
V = rand(N + M, m) .- 0.5
EX = E_loop(X, Y)
grad_EX = zeros(size(X))
DE_loop!(grad_EX, X, Y)
eps = [10.0^k for k in 1:-1:-12]
errors = zeros(size(eps))
for i in eachindex(eps)
EX_V_true = E_loop(X + eps[i] * V, Y)
EX_V_approx = EX + eps[i] * sum(grad_EX .* V)
@show EX_V_true
@show EX_V_approx
errors[i] = abs(EX_V_true - EX_V_approx)
end
@show eps
@show errors
foo = UP.lineplot(eps, errors, xscale=:log10, yscale=:log10)
UP.lineplot!(foo, eps, eps)
println(foo)
end
function test_DQ(N::Int, M::Int, m::Int)
X = rand(N + M, m) * 255
V = rand(N + M, m)
QX = zeros(N, M)
Q_loop!(QX, X, N)
dst_true = zeros(N, M)
dst_approx = zeros(N, M)
eps = [10.0^k for k in 1:-1:-8]
errors = zeros(size(eps))
for i in eachindex(eps)
Q_loop!(dst_true, X + eps[i] * V, N)
DQ_V_loop!(dst_approx, X, eps[i] * V, N)
dst_approx .+= QX
errors[i] = sum(abs, dst_approx - dst_true)
end
@show eps
@show errors
println(UP.lineplot(eps, errors, xscale=:log10, yscale=:log10))
end
function test(N::Int, M::Int, m::Int)
Y = rand(N, M) * 255
X = rand(N + M, m) * 255
X_pre = copy(X)
X_pre[N+1:N+M] .= vec(X_pre[N+1:N+M]')
r_matrix = Q_matrix_transpose(X, Y, N)
r_loop = Q_loop(X, Y, N)
@show r_matrix, r_loop, abs(r_matrix - r_loop) / r_loop
end
end # module
# OptimMixture.test(512 * 512, 20, 2)
# OptimMixture.test_DQ(512 * 512, 20, 2)
OptimMixture.test_DE(512 * 512, 20, 2)
# OptimMixture.test_DE(20, 2, 2)
# OptimMixture.test_dot_prod(512^2, 20, 2)
# vim: ts=2:sw=2:sts=2

301
pca.py Normal file
View file

@ -0,0 +1,301 @@
import utils
import numpy as np
from sklearn.decomposition import PCA, SparsePCA, KernelPCA, FastICA
from sklearn.cluster import SpectralClustering, KMeans
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.widgets import Slider
def pca(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, sliders=False, selected=[]):
if sliders:
return pca_sliders(n_components, slab_width, slab_height, pca_method=pca_method, alpha=alpha, kernel=kernel, scale=scale, selected=selected)
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
X = utils.as_np_array(slabs).T
n_freqs, n_pixels = X.shape[1], X.shape[0]
if scale:
X = utils.scale(X)
if pca_method == "full":
p = PCA(n_components = n_components)
elif pca_method == "sparse":
p = SparsePCA(n_components = n_components, alpha=alpha)
elif pca_method == "kernel":
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
elif pca_method == "ica":
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
else:
print(f"unknown PCA pca_method {pca_method}")
return
Y = p.fit_transform(X)
if pca_method in ["full", "sparse", "ica"]:
W = p.components_
else:
W = p.dual_coef_
dots_idc = np.where(Y[:,0] > 0.6)
new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
for c in range(n_components):
new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp()
f = plt.figure(layout="constrained")
gs = GridSpec(2, 3, figure=f)
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
]
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black")
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
for c in range(n_components-1):
# mask = new_coords[c] > 0.6
cb = axes[c+2].imshow(new_coords[c])
# cb = axes[c+2].imshow(mask)
plt.colorbar(cb)
axes[c+2].set_title(f"Component #{c+1}")
axes[4].hist(Y[:,0], bins=25)
if pca_method in ["full", "sparse", "ica"]:
for c in range(n_components-1):
axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid")
axes[5].axvline(2845, color="black", lw=0.5)
axes[5].axvline(2930, color="black", lw=0.5)
axes[5].set_title("Coefficients for each component")
plt.legend(title="Component")
plt.suptitle(f"PCA Method: {pca_method}")
def pca_sliders(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, selected=[]):
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
i_idc = np.array([[i for i in range(slab_height)] for j in range(slab_width)]).flatten()
j_idc = np.array([[j for i in range(slab_height)] for j in range(slab_width)]).flatten()
X = utils.as_np_array(slabs).T
n_pixels, n_freqs = X.shape
if scale:
X = utils.scale(X)
if pca_method == "full":
p = PCA(n_components = n_components)
elif pca_method == "sparse":
p = SparsePCA(n_components = n_components, alpha=alpha)
elif pca_method == "kernel":
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
elif pca_method == "ica":
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
else:
print(f"unknown PCA pca_method {pca_method}")
return
Y_pca = p.fit_transform(X)
if len(selected) > 0:
Y = X[:,selected]
else:
Y = Y_pca
if pca_method in ["full", "sparse", "ica"]:
W = p.components_
else:
W = p.dual_coef_
thres_1 = 0.6
thres_2 = 0.2
angle = 0.0
def rot_mat(x):
return np.array([[np.cos(x), -np.sin(x)],[np.sin(x), np.cos(x)]])
def compute_dots_idc(thres_1, thres_2, angle):
M = rot_mat(angle)
YY = np.dot(M, (Y[:, :2] - [thres_1, thres_2]).T)
return np.where((YY[0,:] >= 0) * (YY[1,:] >= 0))[0]
dots_idc = compute_dots_idc(thres_1, thres_2, angle)
new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
for c in range(n_components):
new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp()
f = plt.figure(layout="constrained")
gs = GridSpec(2, 3, left=0, right=0.9)
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]),
# f.add_subplot(gs[2,0]), f.add_subplot(gs[2,2])
]
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
pca_scatter = axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black")
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
overlay_scatter = axes[1].scatter(i_idc[dots_idc], j_idc[dots_idc], s=0.1, color="black")
f_img, ax_img = plt.subplots()
ax_img.imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
ax_img.set_title(f"data @ {slabs[15]["e"]}cm-1")
overlay_scatter_img = ax_img.scatter(i_idc[dots_idc], j_idc[dots_idc], s=1, color="black")
for c in range(n_components):
# mask = new_coords[c] > 0.6
cb = axes[c+2].imshow(new_coords[c])
# cb = axes[c+2].imshow(mask)
plt.colorbar(cb)
if len(selected) > 0:
axes[c+2].set_title(f"Energy #{c+1} ({slabs[selected[c]]["e"]}cm-1)")
else:
axes[c+2].set_title(f"PCA component #{c+1}")
if len(selected) > 0:
axes[4].hist(Y[:,0], bins=25, ec="black", histtype="step")
axes[4].hist(Y[:,1], bins=25, ec="grey", histtype="step")
else:
axes[4].hist(Y[:,0], bins=25)
axes[4].hist(Y[:,1], bins=25)
axes[4].axvline(thres_1, lw=0.5, color="black")
axes[4].axvline(thres_2, lw=0.5, color="black")
if pca_method in ["full", "sparse", "ica"]:
for c in range(n_components):
axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid")
axes[5].axvline(2845, color="black", lw=0.5)
axes[5].axvline(2930, color="black", lw=0.5)
axes[5].set_title("Coefficients for each component")
axes[5].legend(title="Component")
for e in selected:
axes[5].axvline(utils.energies[e], ls="dotted")
# Make a horizontal slider to control the frequency.
thres_1_axis = f.add_axes([0.91, 0.05, 0.03, 0.9])
thres_1_slider = Slider(
ax=thres_1_axis,
label='Thres 1',
valmin=Y[:,0].min(),
valstep=1,
valmax=Y[:,0].max(),
valinit=0.5*(Y[:,0].min()+Y[:,0].max()),
orientation="vertical"
)
thres_2_axis = f.add_axes([0.94, 0.05, 0.03, 0.9])
thres_2_slider = Slider(
ax=thres_2_axis,
label='Thres 2',
valmin=Y[:,1].min(),
valstep=1,
valmax=Y[:,1].max(),
valinit=0.5*(Y[:,1].min()+Y[:,1].max()),
orientation="vertical"
)
angle_axis = f.add_axes([0.97, 0.05, 0.03, 0.9])
angle_slider = Slider(
ax=angle_axis,
label='Angle',
valmin=0,
valstep=0.01,
valmax=2*np.pi,
valinit=0,
orientation="vertical"
)
# # The function to be called anytime a slider's value changes
def update(val):
dots_idc = compute_dots_idc(thres_1_slider.val, thres_2_slider.val, angle_slider.val)
new_offsets = Y[dots_idc,:2]
pca_scatter.set_offsets(new_offsets)
new_offsets = np.c_[i_idc[dots_idc], j_idc[dots_idc]]
overlay_scatter.set_offsets(new_offsets)
overlay_scatter_img.set_offsets(new_offsets)
f_img.canvas.draw_idle()
def handle_click(event):
if event.inaxes != axes[0]: return
thres_1_slider.set_val(event.xdata)
thres_2_slider.set_val(event.ydata)
update(0)
f.canvas.mpl_connect('button_press_event', handle_click)
thres_1_slider.on_changed(update)
thres_2_slider.on_changed(update)
angle_slider.on_changed(update)
plt.suptitle(f"PCA Method: {pca_method}")
return [thres_1_slider, thres_2_slider, angle_slider]
def pca_cluster(n_components, n_clusters, slab_width, slab_height, pca_method="full", cluster_method="spectral", alpha=1, kernel="linear", scale=False):
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
X = utils.as_np_array(slabs).T
n_freqs, n_pixels = X.shape[1], X.shape[0]
if scale:
X = utils.scale(X)
if pca_method == "full":
p = PCA(n_components = n_components)
elif pca_method == "sparse":
p = SparsePCA(n_components = n_components, alpha=alpha)
elif pca_method == "kernel":
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
elif pca_method == "ica":
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
else:
print(f"unknown PCA method {pca_method}")
return
Y = p.fit_transform(X)
if pca_method in ["full", "sparse", "ica"]:
W = p.components_
else:
W = p.dual_coef_
if cluster_method == "spectral":
Z = SpectralClustering(n_clusters=n_clusters, affinity="nearest_neighbors").fit(Y).labels_.astype(int)
elif cluster_method == "kmeans":
Z = KMeans(n_clusters=n_clusters).fit(Y).labels_.astype(int)
else:
print(f"unknown clustering method {cluster_method}")
return
labels = Z.reshape((slab_height, slab_width))
new_coords = np.array([Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)])
new_coords = new_coords / new_coords.sum(0)
f = plt.figure(layout="constrained")
gs = GridSpec(2, 3, figure=f)
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
]
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02, c=Z)
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
for c in range(n_components):
cb = axes[c+2].imshow(new_coords[c])
plt.colorbar(cb)
axes[c+2].set_title(f"Component #{c+1}")
axes[5].imshow(labels)
axes[5].set_title("Labels")
plt.suptitle(f"PCA Method: {pca_method}")

20
pixel_spectra.py Normal file
View file

@ -0,0 +1,20 @@
import utils
import matplotlib.pyplot as plt
import numpy as np
def pixel_spectra(max_pixels):
slabs, i_min, i_max = utils.load_slabs()
X = utils.as_np_array(slabs)
f, axes = plt.subplots()
n_freqs, n_pixels = X.shape[0], X.shape[1]
generator = np.random.Generator(np.random.PCG64())
idc = np.unique(generator.choice(range(n_pixels), min(n_pixels, max_pixels)))
for i in idc:
axes.plot(utils.energies, X[:,i], c='blue', alpha=0.1)
plt.show()

51
slideshow.py Normal file
View file

@ -0,0 +1,51 @@
import utils
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider
def slideshow():
slabs, i_min, i_max = utils.load_slabs()
f, axes = plt.subplots(2, 3, height_ratios=[10, 1])
for ax in axes.flat:
ax.set_axis_off()
ax, axfreq = axes[:,0]
implot = ax.imshow(slabs[0]["data"], vmin=i_min, vmax=i_max)
s = slabs[0]
ax.set_title(f"Energy: {s["e"]}cm-1, total intensity: {s["total"]:.2e}")
f.subplots_adjust(bottom=0.25)
# Make a horizontal slider to control the frequency.
freq_slider = Slider(
ax=axfreq,
label='Slab',
valmin=0,
valstep=1,
valmax=len(slabs)-1,
valinit=0,
)
# The function to be called anytime a slider's value changes
def update(val):
s = slabs[val]
implot.set_data(s["data"])
ax.set_title(f"Energy: {s["e"]}cm-1, total intensity: {s["total"]:.2e}")
f.canvas.draw_idle()
freq_slider.on_changed(update)
ax_ch2, _ = axes[:,1]
ax_ch2.imshow(slabs[5]["data"], vmin=i_min, vmax=i_max)
s = slabs[5]
ax_ch2.set_title(f"CH2 ({s["e"]} cm-1)")
ax_ch3, _ = axes[:,2]
ax_ch3.imshow(slabs[16]["data"], vmin=i_min, vmax=i_max)
s = slabs[16]
ax_ch3.set_title(f"CH3 ({s["e"]} cm-1)")
plt.tight_layout()
plt.show()

35
utils.py Normal file
View file

@ -0,0 +1,35 @@
import numpy as np
from sklearn.preprocessing import StandardScaler
energies = [2803, 2811, 2819, 2826, 2834, 2842, 2850, 2858, 2866, 2874,
2882, 2890, 2897, 2905, 2913, 2921, 2929, 2937, 2945, 2953,
2961, 2969, 2977, 2985, 2993, 3001, 3009, 3018, 3026, 3034,
3042, 3050]
def load_slabs(width=512, height=512):
slabs = []
i_min, i_max = np.inf, 0
for e in energies:
s = np.genfromtxt(f"test_sample/HeLa_F-SRS_512x512_{e}cm-1.txt", delimiter=",")[0:height,0:width]
slabs.append({"e":e, "data":s, "total":int(s.sum())})
i_min, i_max = min(i_min, s.min()), max(i_max, s.max())
return slabs, i_min, i_max
def as_np_array(slabs):
return np.array(list(map(lambda s:s["data"].flat, slabs)))
def compute_errors(slabs, d=np.linalg.norm):
n = slabs.size(1)
errors = np.zeros(n, n)
for i in range(n):
for j in range(i+1, n):
errors[i,j] = errors[j,i] = d(slabs[i] - slabs[j])
return errors
def scale(X):
scaler = StandardScaler().set_output(transform="pandas")
Y = scaler.fit_transform(X)
print(Y.shape)
return Y