SRS/pca.py
2024-12-18 17:07:43 +01:00

301 lines
11 KiB
Python

import utils
import numpy as np
from sklearn.decomposition import PCA, SparsePCA, KernelPCA, FastICA
from sklearn.cluster import SpectralClustering, KMeans
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.widgets import Slider
def pca(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, sliders=False, selected=[]):
if sliders:
return pca_sliders(n_components, slab_width, slab_height, pca_method=pca_method, alpha=alpha, kernel=kernel, scale=scale, selected=selected)
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
X = utils.as_np_array(slabs).T
n_freqs, n_pixels = X.shape[1], X.shape[0]
if scale:
X = utils.scale(X)
if pca_method == "full":
p = PCA(n_components = n_components)
elif pca_method == "sparse":
p = SparsePCA(n_components = n_components, alpha=alpha)
elif pca_method == "kernel":
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
elif pca_method == "ica":
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
else:
print(f"unknown PCA pca_method {pca_method}")
return
Y = p.fit_transform(X)
if pca_method in ["full", "sparse", "ica"]:
W = p.components_
else:
W = p.dual_coef_
dots_idc = np.where(Y[:,0] > 0.6)
new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
for c in range(n_components):
new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp()
f = plt.figure(layout="constrained")
gs = GridSpec(2, 3, figure=f)
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
]
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black")
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
for c in range(n_components-1):
# mask = new_coords[c] > 0.6
cb = axes[c+2].imshow(new_coords[c])
# cb = axes[c+2].imshow(mask)
plt.colorbar(cb)
axes[c+2].set_title(f"Component #{c+1}")
axes[4].hist(Y[:,0], bins=25)
if pca_method in ["full", "sparse", "ica"]:
for c in range(n_components-1):
axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid")
axes[5].axvline(2845, color="black", lw=0.5)
axes[5].axvline(2930, color="black", lw=0.5)
axes[5].set_title("Coefficients for each component")
plt.legend(title="Component")
plt.suptitle(f"PCA Method: {pca_method}")
def pca_sliders(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, selected=[]):
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
i_idc = np.array([[i for i in range(slab_height)] for j in range(slab_width)]).flatten()
j_idc = np.array([[j for i in range(slab_height)] for j in range(slab_width)]).flatten()
X = utils.as_np_array(slabs).T
n_pixels, n_freqs = X.shape
if scale:
X = utils.scale(X)
if pca_method == "full":
p = PCA(n_components = n_components)
elif pca_method == "sparse":
p = SparsePCA(n_components = n_components, alpha=alpha)
elif pca_method == "kernel":
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
elif pca_method == "ica":
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
else:
print(f"unknown PCA pca_method {pca_method}")
return
Y_pca = p.fit_transform(X)
if len(selected) > 0:
Y = X[:,selected]
else:
Y = Y_pca
if pca_method in ["full", "sparse", "ica"]:
W = p.components_
else:
W = p.dual_coef_
thres_1 = 0.6
thres_2 = 0.2
angle = 0.0
def rot_mat(x):
return np.array([[np.cos(x), -np.sin(x)],[np.sin(x), np.cos(x)]])
def compute_dots_idc(thres_1, thres_2, angle):
M = rot_mat(angle)
YY = np.dot(M, (Y[:, :2] - [thres_1, thres_2]).T)
return np.where((YY[0,:] >= 0) * (YY[1,:] >= 0))[0]
dots_idc = compute_dots_idc(thres_1, thres_2, angle)
new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
for c in range(n_components):
new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp()
f = plt.figure(layout="constrained")
gs = GridSpec(2, 3, left=0, right=0.9)
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]),
# f.add_subplot(gs[2,0]), f.add_subplot(gs[2,2])
]
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
pca_scatter = axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black")
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
overlay_scatter = axes[1].scatter(i_idc[dots_idc], j_idc[dots_idc], s=0.1, color="black")
f_img, ax_img = plt.subplots()
ax_img.imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
ax_img.set_title(f"data @ {slabs[15]["e"]}cm-1")
overlay_scatter_img = ax_img.scatter(i_idc[dots_idc], j_idc[dots_idc], s=1, color="black")
for c in range(n_components):
# mask = new_coords[c] > 0.6
cb = axes[c+2].imshow(new_coords[c])
# cb = axes[c+2].imshow(mask)
plt.colorbar(cb)
if len(selected) > 0:
axes[c+2].set_title(f"Energy #{c+1} ({slabs[selected[c]]["e"]}cm-1)")
else:
axes[c+2].set_title(f"PCA component #{c+1}")
if len(selected) > 0:
axes[4].hist(Y[:,0], bins=25, ec="black", histtype="step")
axes[4].hist(Y[:,1], bins=25, ec="grey", histtype="step")
else:
axes[4].hist(Y[:,0], bins=25)
axes[4].hist(Y[:,1], bins=25)
axes[4].axvline(thres_1, lw=0.5, color="black")
axes[4].axvline(thres_2, lw=0.5, color="black")
if pca_method in ["full", "sparse", "ica"]:
for c in range(n_components):
axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid")
axes[5].axvline(2845, color="black", lw=0.5)
axes[5].axvline(2930, color="black", lw=0.5)
axes[5].set_title("Coefficients for each component")
axes[5].legend(title="Component")
for e in selected:
axes[5].axvline(utils.energies[e], ls="dotted")
# Make a horizontal slider to control the frequency.
thres_1_axis = f.add_axes([0.91, 0.05, 0.03, 0.9])
thres_1_slider = Slider(
ax=thres_1_axis,
label='Thres 1',
valmin=Y[:,0].min(),
valstep=1,
valmax=Y[:,0].max(),
valinit=0.5*(Y[:,0].min()+Y[:,0].max()),
orientation="vertical"
)
thres_2_axis = f.add_axes([0.94, 0.05, 0.03, 0.9])
thres_2_slider = Slider(
ax=thres_2_axis,
label='Thres 2',
valmin=Y[:,1].min(),
valstep=1,
valmax=Y[:,1].max(),
valinit=0.5*(Y[:,1].min()+Y[:,1].max()),
orientation="vertical"
)
angle_axis = f.add_axes([0.97, 0.05, 0.03, 0.9])
angle_slider = Slider(
ax=angle_axis,
label='Angle',
valmin=0,
valstep=0.01,
valmax=2*np.pi,
valinit=0,
orientation="vertical"
)
# # The function to be called anytime a slider's value changes
def update(val):
dots_idc = compute_dots_idc(thres_1_slider.val, thres_2_slider.val, angle_slider.val)
new_offsets = Y[dots_idc,:2]
pca_scatter.set_offsets(new_offsets)
new_offsets = np.c_[i_idc[dots_idc], j_idc[dots_idc]]
overlay_scatter.set_offsets(new_offsets)
overlay_scatter_img.set_offsets(new_offsets)
f_img.canvas.draw_idle()
def handle_click(event):
if event.inaxes != axes[0]: return
thres_1_slider.set_val(event.xdata)
thres_2_slider.set_val(event.ydata)
update(0)
f.canvas.mpl_connect('button_press_event', handle_click)
thres_1_slider.on_changed(update)
thres_2_slider.on_changed(update)
angle_slider.on_changed(update)
plt.suptitle(f"PCA Method: {pca_method}")
return [thres_1_slider, thres_2_slider, angle_slider]
def pca_cluster(n_components, n_clusters, slab_width, slab_height, pca_method="full", cluster_method="spectral", alpha=1, kernel="linear", scale=False):
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
X = utils.as_np_array(slabs).T
n_freqs, n_pixels = X.shape[1], X.shape[0]
if scale:
X = utils.scale(X)
if pca_method == "full":
p = PCA(n_components = n_components)
elif pca_method == "sparse":
p = SparsePCA(n_components = n_components, alpha=alpha)
elif pca_method == "kernel":
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
elif pca_method == "ica":
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
else:
print(f"unknown PCA method {pca_method}")
return
Y = p.fit_transform(X)
if pca_method in ["full", "sparse", "ica"]:
W = p.components_
else:
W = p.dual_coef_
if cluster_method == "spectral":
Z = SpectralClustering(n_clusters=n_clusters, affinity="nearest_neighbors").fit(Y).labels_.astype(int)
elif cluster_method == "kmeans":
Z = KMeans(n_clusters=n_clusters).fit(Y).labels_.astype(int)
else:
print(f"unknown clustering method {cluster_method}")
return
labels = Z.reshape((slab_height, slab_width))
new_coords = np.array([Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)])
new_coords = new_coords / new_coords.sum(0)
f = plt.figure(layout="constrained")
gs = GridSpec(2, 3, figure=f)
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
]
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02, c=Z)
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
for c in range(n_components):
cb = axes[c+2].imshow(new_coords[c])
plt.colorbar(cb)
axes[c+2].set_title(f"Component #{c+1}")
axes[5].imshow(labels)
axes[5].set_title("Labels")
plt.suptitle(f"PCA Method: {pca_method}")