import
This commit is contained in:
commit
b8af1fa23c
9 changed files with 763 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
__pycache__
|
27
hela.py
Normal file
27
hela.py
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib.gridspec import GridSpec
|
||||||
|
|
||||||
|
import utils
|
||||||
|
|
||||||
|
import pca
|
||||||
|
import pixel_spectra
|
||||||
|
|
||||||
|
from sklearn.decomposition import PCA, FastICA
|
||||||
|
|
||||||
|
size = 512
|
||||||
|
|
||||||
|
# pca.pca(2, size, size, method="sparse", alpha=1000)
|
||||||
|
# pca.pca(2, size, size, method="kernel", kernel='poly', scale=True)
|
||||||
|
# pca.pca(2, size, size, method="ica", scale=False)
|
||||||
|
# pca.pca_cluster(3, 4, size, size, scale=False, cluster_method="kmeans")
|
||||||
|
sliders = pca.pca(2, size, size, scale=False, sliders=True)
|
||||||
|
sliders = pca.pca(2, size, size, scale=False, sliders=True, selected=[17, 7])
|
||||||
|
|
||||||
|
# kmeans.kmeans(8, 128, 128)
|
||||||
|
|
||||||
|
# pixel_spectra.pixel_spectra(100)
|
||||||
|
|
||||||
|
# slideshow.slideshow()
|
||||||
|
|
||||||
|
plt.show()
|
36
ica.py
Normal file
36
ica.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
import utils
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.decomposition import FastICA
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib.gridspec import GridSpec
|
||||||
|
|
||||||
|
def ica(n_components, slab_width, slab_height):
|
||||||
|
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
|
||||||
|
|
||||||
|
X = utils.as_np_array(slabs).T
|
||||||
|
n_freqs, n_pixels = X.shape[1], X.shape[0]
|
||||||
|
|
||||||
|
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
|
||||||
|
Y = p.fit_transform(X)
|
||||||
|
A = p.mixing_
|
||||||
|
|
||||||
|
new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
|
||||||
|
|
||||||
|
f = plt.figure(layout="constrained")
|
||||||
|
gs = GridSpec(2, 3, figure=f)
|
||||||
|
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
|
||||||
|
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
|
||||||
|
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
|
||||||
|
]
|
||||||
|
|
||||||
|
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
|
||||||
|
|
||||||
|
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
|
||||||
|
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
|
||||||
|
|
||||||
|
for c in range(n_components):
|
||||||
|
axes[c+2].imshow(new_coords[c])
|
||||||
|
axes[c+2].set_title(f"Component #{c+1}")
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
#ica(3, 512, 512)
|
33
kmeans.py
Normal file
33
kmeans.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
import utils
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
|
||||||
|
def kmeans(n_clusters, slab_width, slab_height):
|
||||||
|
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
|
||||||
|
|
||||||
|
X = utils.as_np_array(slabs)
|
||||||
|
|
||||||
|
f, axes = plt.subplots(1, 3)
|
||||||
|
|
||||||
|
n_freqs, n_pixels = X.shape[0], X.shape[1]
|
||||||
|
|
||||||
|
X = X.T
|
||||||
|
|
||||||
|
km_estimator = KMeans(n_clusters=n_clusters)
|
||||||
|
km_estimator.fit(X)
|
||||||
|
centers = km_estimator.cluster_centers_
|
||||||
|
|
||||||
|
labels = km_estimator.labels_.reshape((slab_width, slab_height))
|
||||||
|
|
||||||
|
for i in range(X.shape[0]):
|
||||||
|
axes[0].plot(utils.energies, X[i,:], c='blue', alpha=0.1)
|
||||||
|
|
||||||
|
for i in range(centers.shape[0]):
|
||||||
|
axes[0].plot(utils.energies, centers[i,:], c='black', alpha=1)
|
||||||
|
|
||||||
|
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
|
||||||
|
|
||||||
|
axes[2].imshow(labels)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
259
optim_mixture.jl
Normal file
259
optim_mixture.jl
Normal file
|
@ -0,0 +1,259 @@
|
||||||
|
module OptimMixture
|
||||||
|
|
||||||
|
import DelimitedFiles as DF
|
||||||
|
import BenchmarkTools as BT
|
||||||
|
import UnicodePlots as UP
|
||||||
|
import LinearAlgebra: ⋅
|
||||||
|
|
||||||
|
const ENERGIES = [2803, 2811, 2819, 2826, 2834, 2842, 2850, 2858, 2866, 2874,
|
||||||
|
2882, 2890, 2897, 2905, 2913, 2921, 2929, 2937, 2945, 2953,
|
||||||
|
2961, 2969, 2977, 2985, 2993, 3001, 3009, 3018, 3026, 3034,
|
||||||
|
3042, 3050]
|
||||||
|
|
||||||
|
@kwdef struct Slab
|
||||||
|
energy::Int
|
||||||
|
data::Matrix{Int}
|
||||||
|
end
|
||||||
|
|
||||||
|
function load_slabs(width::Int=512, height::Int=512)
|
||||||
|
slabs = Slab[]()
|
||||||
|
i_min, i_max = typemax(Int), 0
|
||||||
|
for e in ENERGIES
|
||||||
|
slab = Slab(energy=e, data=DF.readdlm("test_sample/HeLa_F-SRS_512x512_2803cm-1.txt", ',', Int))
|
||||||
|
push!(slabs, slab)
|
||||||
|
|
||||||
|
i_min, i_max = min(i_min, minimum(slab.data)), max(i_max, maximum(slab.data))
|
||||||
|
|
||||||
|
return slabs, i_min, i_max
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function Q_matrix_transpose(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int)
|
||||||
|
return sum(abs2, Y - view(X, 1:N, :) * view(X, (N+1):size(X, 1), :)')
|
||||||
|
end
|
||||||
|
|
||||||
|
function Q_matrix_reshape(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int)
|
||||||
|
N_plus_M, m = size(X)
|
||||||
|
return sum(abs2, Y - view(X, 1:N, :) * reshape(view(X, (N+1):N_plus_M, :), m, N_plus_M - N))
|
||||||
|
end
|
||||||
|
|
||||||
|
function E_loop(X::Matrix{Float64}, Y::Matrix{Float64})
|
||||||
|
s = 0
|
||||||
|
N, M = size(Y)
|
||||||
|
m = size(X, 2)
|
||||||
|
for i in 1:N
|
||||||
|
for j in N+1:N+M
|
||||||
|
x = 0
|
||||||
|
for k in 1:m
|
||||||
|
@inbounds x += X[i, k] * X[j, k]
|
||||||
|
end
|
||||||
|
@inbounds s += (x - Y[i, j-N])^2
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return s
|
||||||
|
end
|
||||||
|
|
||||||
|
function Q_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, N::Int)
|
||||||
|
N_plus_M, m = size(X)
|
||||||
|
for i in 1:N
|
||||||
|
for j in N+1:N_plus_M
|
||||||
|
x = 0
|
||||||
|
for k in 1:m
|
||||||
|
@inbounds x += X[i, k] * X[j, k]
|
||||||
|
end
|
||||||
|
@inbounds dst[i, j-N] = x
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function DE_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, Y::Matrix{Float64})
|
||||||
|
dst_W = zeros(size(Y))
|
||||||
|
DE_loop!(dst, dst_W, X, Y)
|
||||||
|
end
|
||||||
|
|
||||||
|
function dot_prod_dual_loop(W::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64})
|
||||||
|
_, m = size(X)
|
||||||
|
N, M = size(W)
|
||||||
|
|
||||||
|
s = 0
|
||||||
|
|
||||||
|
for k in 1:m
|
||||||
|
for i in 1:N
|
||||||
|
x = 0
|
||||||
|
for j in 1:M
|
||||||
|
@inbounds x += W[i, j] * X[N+j, k]
|
||||||
|
end
|
||||||
|
s += x * V[i, k]
|
||||||
|
end
|
||||||
|
for j in 1:M
|
||||||
|
x = 0
|
||||||
|
for i in 1:N
|
||||||
|
@inbounds x += W[i, j] * X[i, k]
|
||||||
|
end
|
||||||
|
s += x * V[N+j, k]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return s
|
||||||
|
end
|
||||||
|
|
||||||
|
function dot_prod_primal_loop(W::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64})
|
||||||
|
_, m = size(X)
|
||||||
|
N, M = size(W)
|
||||||
|
|
||||||
|
s = 0
|
||||||
|
|
||||||
|
for i in 1:N
|
||||||
|
for j in 1:M
|
||||||
|
x = 0
|
||||||
|
for k in 1:m
|
||||||
|
@inbounds x += X[i, k] * V[N+j, k] + X[N+j, k] * V[i, k]
|
||||||
|
|
||||||
|
end
|
||||||
|
s += x * W[i, j]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return s
|
||||||
|
end
|
||||||
|
|
||||||
|
function DE_loop!(dst::Matrix{Float64}, dst_W::Matrix{Float64}, X::Matrix{Float64}, Y::Matrix{Float64})
|
||||||
|
_, m = size(X)
|
||||||
|
N, M = size(Y)
|
||||||
|
# dst is the same size as X
|
||||||
|
# we need storage of size Y
|
||||||
|
Q_loop!(dst_W, X, N)
|
||||||
|
# compute W = Q(X) - Y
|
||||||
|
dst_W .-= Y
|
||||||
|
for k in 1:m
|
||||||
|
for i in 1:N
|
||||||
|
x = 0
|
||||||
|
for j in 1:M
|
||||||
|
@inbounds x += dst_W[i, j] * X[N+j, k]
|
||||||
|
end
|
||||||
|
@inbounds dst[i, k] = x
|
||||||
|
end
|
||||||
|
for j in 1:M
|
||||||
|
x = 0
|
||||||
|
for i in 1:N
|
||||||
|
@inbounds x += dst_W[i, j] * X[i, k]
|
||||||
|
end
|
||||||
|
@inbounds dst[N+j, k] = x
|
||||||
|
end
|
||||||
|
end
|
||||||
|
dst .*= 2
|
||||||
|
end
|
||||||
|
|
||||||
|
function DQ_V_loop!(dst::Matrix{Float64}, X::Matrix{Float64}, V::Matrix{Float64}, N::Int)
|
||||||
|
N_plus_M, m = size(X)
|
||||||
|
for i in 1:N
|
||||||
|
for j in N+1:N_plus_M
|
||||||
|
x = 0
|
||||||
|
for k in 1:m
|
||||||
|
@inbounds x += (V[i, k] * X[j, k] + X[i, k] * V[j, k])
|
||||||
|
end
|
||||||
|
@inbounds dst[i, j-N] = x
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function Q_loop_transposed(X::Matrix{Float64}, Y::Matrix{Float64}, N::Int)
|
||||||
|
s = 0
|
||||||
|
m, N_plus_M = size(X)
|
||||||
|
for i in 1:N
|
||||||
|
for j in N+1:N_plus_M
|
||||||
|
x = 0
|
||||||
|
for k in 1:m
|
||||||
|
@inbounds x += X[k, i] * X[k, j]
|
||||||
|
end
|
||||||
|
@inbounds s += (Y[j-N, i] - x)^2
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return s
|
||||||
|
end
|
||||||
|
|
||||||
|
function test_dot_prod(N::Int, M::Int, m::Int)
|
||||||
|
X = rand(N + M, m)
|
||||||
|
W = rand(N, M)
|
||||||
|
V = rand(N + M, m) .- 0.5
|
||||||
|
|
||||||
|
@show dot_prod_primal_loop(W, X, V)
|
||||||
|
@show dot_prod_dual_loop(W, X, V)
|
||||||
|
end
|
||||||
|
|
||||||
|
function test_DE(N::Int, M::Int, m::Int)
|
||||||
|
X = rand(N + M, m)
|
||||||
|
Y = rand(N, M)
|
||||||
|
V = rand(N + M, m) .- 0.5
|
||||||
|
|
||||||
|
EX = E_loop(X, Y)
|
||||||
|
grad_EX = zeros(size(X))
|
||||||
|
DE_loop!(grad_EX, X, Y)
|
||||||
|
|
||||||
|
eps = [10.0^k for k in 1:-1:-12]
|
||||||
|
errors = zeros(size(eps))
|
||||||
|
|
||||||
|
for i in eachindex(eps)
|
||||||
|
EX_V_true = E_loop(X + eps[i] * V, Y)
|
||||||
|
EX_V_approx = EX + eps[i] * sum(grad_EX .* V)
|
||||||
|
@show EX_V_true
|
||||||
|
@show EX_V_approx
|
||||||
|
errors[i] = abs(EX_V_true - EX_V_approx)
|
||||||
|
end
|
||||||
|
|
||||||
|
@show eps
|
||||||
|
@show errors
|
||||||
|
|
||||||
|
foo = UP.lineplot(eps, errors, xscale=:log10, yscale=:log10)
|
||||||
|
UP.lineplot!(foo, eps, eps)
|
||||||
|
println(foo)
|
||||||
|
end
|
||||||
|
|
||||||
|
function test_DQ(N::Int, M::Int, m::Int)
|
||||||
|
X = rand(N + M, m) * 255
|
||||||
|
V = rand(N + M, m)
|
||||||
|
|
||||||
|
QX = zeros(N, M)
|
||||||
|
Q_loop!(QX, X, N)
|
||||||
|
|
||||||
|
dst_true = zeros(N, M)
|
||||||
|
dst_approx = zeros(N, M)
|
||||||
|
|
||||||
|
eps = [10.0^k for k in 1:-1:-8]
|
||||||
|
errors = zeros(size(eps))
|
||||||
|
|
||||||
|
for i in eachindex(eps)
|
||||||
|
Q_loop!(dst_true, X + eps[i] * V, N)
|
||||||
|
DQ_V_loop!(dst_approx, X, eps[i] * V, N)
|
||||||
|
dst_approx .+= QX
|
||||||
|
errors[i] = sum(abs, dst_approx - dst_true)
|
||||||
|
end
|
||||||
|
|
||||||
|
@show eps
|
||||||
|
@show errors
|
||||||
|
|
||||||
|
println(UP.lineplot(eps, errors, xscale=:log10, yscale=:log10))
|
||||||
|
end
|
||||||
|
|
||||||
|
function test(N::Int, M::Int, m::Int)
|
||||||
|
Y = rand(N, M) * 255
|
||||||
|
X = rand(N + M, m) * 255
|
||||||
|
|
||||||
|
X_pre = copy(X)
|
||||||
|
X_pre[N+1:N+M] .= vec(X_pre[N+1:N+M]')
|
||||||
|
|
||||||
|
r_matrix = Q_matrix_transpose(X, Y, N)
|
||||||
|
r_loop = Q_loop(X, Y, N)
|
||||||
|
|
||||||
|
@show r_matrix, r_loop, abs(r_matrix - r_loop) / r_loop
|
||||||
|
end
|
||||||
|
|
||||||
|
end # module
|
||||||
|
|
||||||
|
# OptimMixture.test(512 * 512, 20, 2)
|
||||||
|
# OptimMixture.test_DQ(512 * 512, 20, 2)
|
||||||
|
OptimMixture.test_DE(512 * 512, 20, 2)
|
||||||
|
# OptimMixture.test_DE(20, 2, 2)
|
||||||
|
# OptimMixture.test_dot_prod(512^2, 20, 2)
|
||||||
|
|
||||||
|
# vim: ts=2:sw=2:sts=2
|
301
pca.py
Normal file
301
pca.py
Normal file
|
@ -0,0 +1,301 @@
|
||||||
|
import utils
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.decomposition import PCA, SparsePCA, KernelPCA, FastICA
|
||||||
|
from sklearn.cluster import SpectralClustering, KMeans
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib.gridspec import GridSpec
|
||||||
|
|
||||||
|
from matplotlib.widgets import Slider
|
||||||
|
|
||||||
|
def pca(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, sliders=False, selected=[]):
|
||||||
|
if sliders:
|
||||||
|
return pca_sliders(n_components, slab_width, slab_height, pca_method=pca_method, alpha=alpha, kernel=kernel, scale=scale, selected=selected)
|
||||||
|
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
|
||||||
|
|
||||||
|
X = utils.as_np_array(slabs).T
|
||||||
|
n_freqs, n_pixels = X.shape[1], X.shape[0]
|
||||||
|
|
||||||
|
if scale:
|
||||||
|
X = utils.scale(X)
|
||||||
|
|
||||||
|
if pca_method == "full":
|
||||||
|
p = PCA(n_components = n_components)
|
||||||
|
elif pca_method == "sparse":
|
||||||
|
p = SparsePCA(n_components = n_components, alpha=alpha)
|
||||||
|
elif pca_method == "kernel":
|
||||||
|
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
|
||||||
|
elif pca_method == "ica":
|
||||||
|
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
|
||||||
|
else:
|
||||||
|
print(f"unknown PCA pca_method {pca_method}")
|
||||||
|
return
|
||||||
|
|
||||||
|
Y = p.fit_transform(X)
|
||||||
|
if pca_method in ["full", "sparse", "ica"]:
|
||||||
|
W = p.components_
|
||||||
|
else:
|
||||||
|
W = p.dual_coef_
|
||||||
|
|
||||||
|
dots_idc = np.where(Y[:,0] > 0.6)
|
||||||
|
|
||||||
|
new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
|
||||||
|
for c in range(n_components):
|
||||||
|
new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp()
|
||||||
|
|
||||||
|
f = plt.figure(layout="constrained")
|
||||||
|
gs = GridSpec(2, 3, figure=f)
|
||||||
|
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
|
||||||
|
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
|
||||||
|
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
|
||||||
|
]
|
||||||
|
|
||||||
|
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
|
||||||
|
axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black")
|
||||||
|
|
||||||
|
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
|
||||||
|
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
|
||||||
|
|
||||||
|
for c in range(n_components-1):
|
||||||
|
# mask = new_coords[c] > 0.6
|
||||||
|
cb = axes[c+2].imshow(new_coords[c])
|
||||||
|
# cb = axes[c+2].imshow(mask)
|
||||||
|
plt.colorbar(cb)
|
||||||
|
axes[c+2].set_title(f"Component #{c+1}")
|
||||||
|
|
||||||
|
axes[4].hist(Y[:,0], bins=25)
|
||||||
|
|
||||||
|
if pca_method in ["full", "sparse", "ica"]:
|
||||||
|
for c in range(n_components-1):
|
||||||
|
axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid")
|
||||||
|
axes[5].axvline(2845, color="black", lw=0.5)
|
||||||
|
axes[5].axvline(2930, color="black", lw=0.5)
|
||||||
|
axes[5].set_title("Coefficients for each component")
|
||||||
|
|
||||||
|
plt.legend(title="Component")
|
||||||
|
plt.suptitle(f"PCA Method: {pca_method}")
|
||||||
|
|
||||||
|
def pca_sliders(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, selected=[]):
|
||||||
|
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
|
||||||
|
|
||||||
|
i_idc = np.array([[i for i in range(slab_height)] for j in range(slab_width)]).flatten()
|
||||||
|
j_idc = np.array([[j for i in range(slab_height)] for j in range(slab_width)]).flatten()
|
||||||
|
|
||||||
|
X = utils.as_np_array(slabs).T
|
||||||
|
n_pixels, n_freqs = X.shape
|
||||||
|
|
||||||
|
if scale:
|
||||||
|
X = utils.scale(X)
|
||||||
|
|
||||||
|
if pca_method == "full":
|
||||||
|
p = PCA(n_components = n_components)
|
||||||
|
elif pca_method == "sparse":
|
||||||
|
p = SparsePCA(n_components = n_components, alpha=alpha)
|
||||||
|
elif pca_method == "kernel":
|
||||||
|
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
|
||||||
|
elif pca_method == "ica":
|
||||||
|
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
|
||||||
|
else:
|
||||||
|
print(f"unknown PCA pca_method {pca_method}")
|
||||||
|
return
|
||||||
|
Y_pca = p.fit_transform(X)
|
||||||
|
|
||||||
|
if len(selected) > 0:
|
||||||
|
Y = X[:,selected]
|
||||||
|
else:
|
||||||
|
Y = Y_pca
|
||||||
|
|
||||||
|
if pca_method in ["full", "sparse", "ica"]:
|
||||||
|
W = p.components_
|
||||||
|
else:
|
||||||
|
W = p.dual_coef_
|
||||||
|
|
||||||
|
thres_1 = 0.6
|
||||||
|
thres_2 = 0.2
|
||||||
|
angle = 0.0
|
||||||
|
|
||||||
|
def rot_mat(x):
|
||||||
|
return np.array([[np.cos(x), -np.sin(x)],[np.sin(x), np.cos(x)]])
|
||||||
|
|
||||||
|
def compute_dots_idc(thres_1, thres_2, angle):
|
||||||
|
M = rot_mat(angle)
|
||||||
|
|
||||||
|
YY = np.dot(M, (Y[:, :2] - [thres_1, thres_2]).T)
|
||||||
|
|
||||||
|
return np.where((YY[0,:] >= 0) * (YY[1,:] >= 0))[0]
|
||||||
|
|
||||||
|
dots_idc = compute_dots_idc(thres_1, thres_2, angle)
|
||||||
|
|
||||||
|
new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
|
||||||
|
for c in range(n_components):
|
||||||
|
new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp()
|
||||||
|
|
||||||
|
f = plt.figure(layout="constrained")
|
||||||
|
gs = GridSpec(2, 3, left=0, right=0.9)
|
||||||
|
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
|
||||||
|
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
|
||||||
|
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]),
|
||||||
|
# f.add_subplot(gs[2,0]), f.add_subplot(gs[2,2])
|
||||||
|
]
|
||||||
|
|
||||||
|
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
|
||||||
|
pca_scatter = axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black")
|
||||||
|
|
||||||
|
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
|
||||||
|
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
|
||||||
|
overlay_scatter = axes[1].scatter(i_idc[dots_idc], j_idc[dots_idc], s=0.1, color="black")
|
||||||
|
|
||||||
|
f_img, ax_img = plt.subplots()
|
||||||
|
|
||||||
|
ax_img.imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
|
||||||
|
ax_img.set_title(f"data @ {slabs[15]["e"]}cm-1")
|
||||||
|
overlay_scatter_img = ax_img.scatter(i_idc[dots_idc], j_idc[dots_idc], s=1, color="black")
|
||||||
|
|
||||||
|
for c in range(n_components):
|
||||||
|
# mask = new_coords[c] > 0.6
|
||||||
|
cb = axes[c+2].imshow(new_coords[c])
|
||||||
|
# cb = axes[c+2].imshow(mask)
|
||||||
|
plt.colorbar(cb)
|
||||||
|
if len(selected) > 0:
|
||||||
|
axes[c+2].set_title(f"Energy #{c+1} ({slabs[selected[c]]["e"]}cm-1)")
|
||||||
|
else:
|
||||||
|
axes[c+2].set_title(f"PCA component #{c+1}")
|
||||||
|
|
||||||
|
if len(selected) > 0:
|
||||||
|
axes[4].hist(Y[:,0], bins=25, ec="black", histtype="step")
|
||||||
|
axes[4].hist(Y[:,1], bins=25, ec="grey", histtype="step")
|
||||||
|
else:
|
||||||
|
axes[4].hist(Y[:,0], bins=25)
|
||||||
|
axes[4].hist(Y[:,1], bins=25)
|
||||||
|
axes[4].axvline(thres_1, lw=0.5, color="black")
|
||||||
|
axes[4].axvline(thres_2, lw=0.5, color="black")
|
||||||
|
|
||||||
|
if pca_method in ["full", "sparse", "ica"]:
|
||||||
|
for c in range(n_components):
|
||||||
|
axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid")
|
||||||
|
axes[5].axvline(2845, color="black", lw=0.5)
|
||||||
|
axes[5].axvline(2930, color="black", lw=0.5)
|
||||||
|
axes[5].set_title("Coefficients for each component")
|
||||||
|
axes[5].legend(title="Component")
|
||||||
|
for e in selected:
|
||||||
|
axes[5].axvline(utils.energies[e], ls="dotted")
|
||||||
|
|
||||||
|
# Make a horizontal slider to control the frequency.
|
||||||
|
thres_1_axis = f.add_axes([0.91, 0.05, 0.03, 0.9])
|
||||||
|
thres_1_slider = Slider(
|
||||||
|
ax=thres_1_axis,
|
||||||
|
label='Thres 1',
|
||||||
|
valmin=Y[:,0].min(),
|
||||||
|
valstep=1,
|
||||||
|
valmax=Y[:,0].max(),
|
||||||
|
valinit=0.5*(Y[:,0].min()+Y[:,0].max()),
|
||||||
|
orientation="vertical"
|
||||||
|
)
|
||||||
|
thres_2_axis = f.add_axes([0.94, 0.05, 0.03, 0.9])
|
||||||
|
thres_2_slider = Slider(
|
||||||
|
ax=thres_2_axis,
|
||||||
|
label='Thres 2',
|
||||||
|
valmin=Y[:,1].min(),
|
||||||
|
valstep=1,
|
||||||
|
valmax=Y[:,1].max(),
|
||||||
|
valinit=0.5*(Y[:,1].min()+Y[:,1].max()),
|
||||||
|
orientation="vertical"
|
||||||
|
)
|
||||||
|
|
||||||
|
angle_axis = f.add_axes([0.97, 0.05, 0.03, 0.9])
|
||||||
|
angle_slider = Slider(
|
||||||
|
ax=angle_axis,
|
||||||
|
label='Angle',
|
||||||
|
valmin=0,
|
||||||
|
valstep=0.01,
|
||||||
|
valmax=2*np.pi,
|
||||||
|
valinit=0,
|
||||||
|
orientation="vertical"
|
||||||
|
)
|
||||||
|
|
||||||
|
# # The function to be called anytime a slider's value changes
|
||||||
|
def update(val):
|
||||||
|
dots_idc = compute_dots_idc(thres_1_slider.val, thres_2_slider.val, angle_slider.val)
|
||||||
|
new_offsets = Y[dots_idc,:2]
|
||||||
|
pca_scatter.set_offsets(new_offsets)
|
||||||
|
new_offsets = np.c_[i_idc[dots_idc], j_idc[dots_idc]]
|
||||||
|
overlay_scatter.set_offsets(new_offsets)
|
||||||
|
overlay_scatter_img.set_offsets(new_offsets)
|
||||||
|
f_img.canvas.draw_idle()
|
||||||
|
|
||||||
|
def handle_click(event):
|
||||||
|
if event.inaxes != axes[0]: return
|
||||||
|
thres_1_slider.set_val(event.xdata)
|
||||||
|
thres_2_slider.set_val(event.ydata)
|
||||||
|
update(0)
|
||||||
|
|
||||||
|
f.canvas.mpl_connect('button_press_event', handle_click)
|
||||||
|
|
||||||
|
thres_1_slider.on_changed(update)
|
||||||
|
thres_2_slider.on_changed(update)
|
||||||
|
angle_slider.on_changed(update)
|
||||||
|
|
||||||
|
plt.suptitle(f"PCA Method: {pca_method}")
|
||||||
|
|
||||||
|
return [thres_1_slider, thres_2_slider, angle_slider]
|
||||||
|
|
||||||
|
def pca_cluster(n_components, n_clusters, slab_width, slab_height, pca_method="full", cluster_method="spectral", alpha=1, kernel="linear", scale=False):
|
||||||
|
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
|
||||||
|
|
||||||
|
X = utils.as_np_array(slabs).T
|
||||||
|
n_freqs, n_pixels = X.shape[1], X.shape[0]
|
||||||
|
|
||||||
|
if scale:
|
||||||
|
X = utils.scale(X)
|
||||||
|
|
||||||
|
if pca_method == "full":
|
||||||
|
p = PCA(n_components = n_components)
|
||||||
|
elif pca_method == "sparse":
|
||||||
|
p = SparsePCA(n_components = n_components, alpha=alpha)
|
||||||
|
elif pca_method == "kernel":
|
||||||
|
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
|
||||||
|
elif pca_method == "ica":
|
||||||
|
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
|
||||||
|
else:
|
||||||
|
print(f"unknown PCA method {pca_method}")
|
||||||
|
return
|
||||||
|
|
||||||
|
Y = p.fit_transform(X)
|
||||||
|
if pca_method in ["full", "sparse", "ica"]:
|
||||||
|
W = p.components_
|
||||||
|
else:
|
||||||
|
W = p.dual_coef_
|
||||||
|
|
||||||
|
if cluster_method == "spectral":
|
||||||
|
Z = SpectralClustering(n_clusters=n_clusters, affinity="nearest_neighbors").fit(Y).labels_.astype(int)
|
||||||
|
elif cluster_method == "kmeans":
|
||||||
|
Z = KMeans(n_clusters=n_clusters).fit(Y).labels_.astype(int)
|
||||||
|
else:
|
||||||
|
print(f"unknown clustering method {cluster_method}")
|
||||||
|
return
|
||||||
|
|
||||||
|
labels = Z.reshape((slab_height, slab_width))
|
||||||
|
|
||||||
|
new_coords = np.array([Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)])
|
||||||
|
new_coords = new_coords / new_coords.sum(0)
|
||||||
|
|
||||||
|
f = plt.figure(layout="constrained")
|
||||||
|
gs = GridSpec(2, 3, figure=f)
|
||||||
|
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
|
||||||
|
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
|
||||||
|
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
|
||||||
|
]
|
||||||
|
|
||||||
|
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02, c=Z)
|
||||||
|
|
||||||
|
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
|
||||||
|
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
|
||||||
|
|
||||||
|
for c in range(n_components):
|
||||||
|
cb = axes[c+2].imshow(new_coords[c])
|
||||||
|
plt.colorbar(cb)
|
||||||
|
axes[c+2].set_title(f"Component #{c+1}")
|
||||||
|
|
||||||
|
axes[5].imshow(labels)
|
||||||
|
axes[5].set_title("Labels")
|
||||||
|
|
||||||
|
plt.suptitle(f"PCA Method: {pca_method}")
|
20
pixel_spectra.py
Normal file
20
pixel_spectra.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
import utils
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def pixel_spectra(max_pixels):
|
||||||
|
slabs, i_min, i_max = utils.load_slabs()
|
||||||
|
|
||||||
|
X = utils.as_np_array(slabs)
|
||||||
|
|
||||||
|
f, axes = plt.subplots()
|
||||||
|
|
||||||
|
n_freqs, n_pixels = X.shape[0], X.shape[1]
|
||||||
|
|
||||||
|
generator = np.random.Generator(np.random.PCG64())
|
||||||
|
idc = np.unique(generator.choice(range(n_pixels), min(n_pixels, max_pixels)))
|
||||||
|
|
||||||
|
for i in idc:
|
||||||
|
axes.plot(utils.energies, X[:,i], c='blue', alpha=0.1)
|
||||||
|
plt.show()
|
||||||
|
|
51
slideshow.py
Normal file
51
slideshow.py
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
import utils
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
from matplotlib.widgets import Slider
|
||||||
|
|
||||||
|
def slideshow():
|
||||||
|
slabs, i_min, i_max = utils.load_slabs()
|
||||||
|
|
||||||
|
f, axes = plt.subplots(2, 3, height_ratios=[10, 1])
|
||||||
|
|
||||||
|
for ax in axes.flat:
|
||||||
|
ax.set_axis_off()
|
||||||
|
|
||||||
|
ax, axfreq = axes[:,0]
|
||||||
|
implot = ax.imshow(slabs[0]["data"], vmin=i_min, vmax=i_max)
|
||||||
|
s = slabs[0]
|
||||||
|
ax.set_title(f"Energy: {s["e"]}cm-1, total intensity: {s["total"]:.2e}")
|
||||||
|
f.subplots_adjust(bottom=0.25)
|
||||||
|
|
||||||
|
# Make a horizontal slider to control the frequency.
|
||||||
|
freq_slider = Slider(
|
||||||
|
ax=axfreq,
|
||||||
|
label='Slab',
|
||||||
|
valmin=0,
|
||||||
|
valstep=1,
|
||||||
|
valmax=len(slabs)-1,
|
||||||
|
valinit=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# The function to be called anytime a slider's value changes
|
||||||
|
def update(val):
|
||||||
|
s = slabs[val]
|
||||||
|
implot.set_data(s["data"])
|
||||||
|
ax.set_title(f"Energy: {s["e"]}cm-1, total intensity: {s["total"]:.2e}")
|
||||||
|
f.canvas.draw_idle()
|
||||||
|
|
||||||
|
freq_slider.on_changed(update)
|
||||||
|
|
||||||
|
ax_ch2, _ = axes[:,1]
|
||||||
|
ax_ch2.imshow(slabs[5]["data"], vmin=i_min, vmax=i_max)
|
||||||
|
s = slabs[5]
|
||||||
|
ax_ch2.set_title(f"CH2 ({s["e"]} cm-1)")
|
||||||
|
|
||||||
|
ax_ch3, _ = axes[:,2]
|
||||||
|
ax_ch3.imshow(slabs[16]["data"], vmin=i_min, vmax=i_max)
|
||||||
|
s = slabs[16]
|
||||||
|
ax_ch3.set_title(f"CH3 ({s["e"]} cm-1)")
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.show()
|
||||||
|
|
35
utils.py
Normal file
35
utils.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
|
energies = [2803, 2811, 2819, 2826, 2834, 2842, 2850, 2858, 2866, 2874,
|
||||||
|
2882, 2890, 2897, 2905, 2913, 2921, 2929, 2937, 2945, 2953,
|
||||||
|
2961, 2969, 2977, 2985, 2993, 3001, 3009, 3018, 3026, 3034,
|
||||||
|
3042, 3050]
|
||||||
|
|
||||||
|
def load_slabs(width=512, height=512):
|
||||||
|
slabs = []
|
||||||
|
i_min, i_max = np.inf, 0
|
||||||
|
for e in energies:
|
||||||
|
s = np.genfromtxt(f"test_sample/HeLa_F-SRS_512x512_{e}cm-1.txt", delimiter=",")[0:height,0:width]
|
||||||
|
slabs.append({"e":e, "data":s, "total":int(s.sum())})
|
||||||
|
|
||||||
|
i_min, i_max = min(i_min, s.min()), max(i_max, s.max())
|
||||||
|
|
||||||
|
return slabs, i_min, i_max
|
||||||
|
|
||||||
|
def as_np_array(slabs):
|
||||||
|
return np.array(list(map(lambda s:s["data"].flat, slabs)))
|
||||||
|
|
||||||
|
def compute_errors(slabs, d=np.linalg.norm):
|
||||||
|
n = slabs.size(1)
|
||||||
|
errors = np.zeros(n, n)
|
||||||
|
for i in range(n):
|
||||||
|
for j in range(i+1, n):
|
||||||
|
errors[i,j] = errors[j,i] = d(slabs[i] - slabs[j])
|
||||||
|
return errors
|
||||||
|
|
||||||
|
def scale(X):
|
||||||
|
scaler = StandardScaler().set_output(transform="pandas")
|
||||||
|
Y = scaler.fit_transform(X)
|
||||||
|
print(Y.shape)
|
||||||
|
return Y
|
Loading…
Reference in a new issue