301 lines
11 KiB
Python
301 lines
11 KiB
Python
import utils
|
|
import numpy as np
|
|
from sklearn.decomposition import PCA, SparsePCA, KernelPCA, FastICA
|
|
from sklearn.cluster import SpectralClustering, KMeans
|
|
import matplotlib.pyplot as plt
|
|
from matplotlib.gridspec import GridSpec
|
|
|
|
from matplotlib.widgets import Slider
|
|
|
|
def pca(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, sliders=False, selected=[]):
|
|
if sliders:
|
|
return pca_sliders(n_components, slab_width, slab_height, pca_method=pca_method, alpha=alpha, kernel=kernel, scale=scale, selected=selected)
|
|
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
|
|
|
|
X = utils.as_np_array(slabs).T
|
|
n_freqs, n_pixels = X.shape[1], X.shape[0]
|
|
|
|
if scale:
|
|
X = utils.scale(X)
|
|
|
|
if pca_method == "full":
|
|
p = PCA(n_components = n_components)
|
|
elif pca_method == "sparse":
|
|
p = SparsePCA(n_components = n_components, alpha=alpha)
|
|
elif pca_method == "kernel":
|
|
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
|
|
elif pca_method == "ica":
|
|
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
|
|
else:
|
|
print(f"unknown PCA pca_method {pca_method}")
|
|
return
|
|
|
|
Y = p.fit_transform(X)
|
|
if pca_method in ["full", "sparse", "ica"]:
|
|
W = p.components_
|
|
else:
|
|
W = p.dual_coef_
|
|
|
|
dots_idc = np.where(Y[:,0] > 0.6)
|
|
|
|
new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
|
|
for c in range(n_components):
|
|
new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp()
|
|
|
|
f = plt.figure(layout="constrained")
|
|
gs = GridSpec(2, 3, figure=f)
|
|
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
|
|
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
|
|
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
|
|
]
|
|
|
|
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
|
|
axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black")
|
|
|
|
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
|
|
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
|
|
|
|
for c in range(n_components-1):
|
|
# mask = new_coords[c] > 0.6
|
|
cb = axes[c+2].imshow(new_coords[c])
|
|
# cb = axes[c+2].imshow(mask)
|
|
plt.colorbar(cb)
|
|
axes[c+2].set_title(f"Component #{c+1}")
|
|
|
|
axes[4].hist(Y[:,0], bins=25)
|
|
|
|
if pca_method in ["full", "sparse", "ica"]:
|
|
for c in range(n_components-1):
|
|
axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid")
|
|
axes[5].axvline(2845, color="black", lw=0.5)
|
|
axes[5].axvline(2930, color="black", lw=0.5)
|
|
axes[5].set_title("Coefficients for each component")
|
|
|
|
plt.legend(title="Component")
|
|
plt.suptitle(f"PCA Method: {pca_method}")
|
|
|
|
def pca_sliders(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, selected=[]):
|
|
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
|
|
|
|
i_idc = np.array([[i for i in range(slab_height)] for j in range(slab_width)]).flatten()
|
|
j_idc = np.array([[j for i in range(slab_height)] for j in range(slab_width)]).flatten()
|
|
|
|
X = utils.as_np_array(slabs).T
|
|
n_pixels, n_freqs = X.shape
|
|
|
|
if scale:
|
|
X = utils.scale(X)
|
|
|
|
if pca_method == "full":
|
|
p = PCA(n_components = n_components)
|
|
elif pca_method == "sparse":
|
|
p = SparsePCA(n_components = n_components, alpha=alpha)
|
|
elif pca_method == "kernel":
|
|
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
|
|
elif pca_method == "ica":
|
|
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
|
|
else:
|
|
print(f"unknown PCA pca_method {pca_method}")
|
|
return
|
|
Y_pca = p.fit_transform(X)
|
|
|
|
if len(selected) > 0:
|
|
Y = X[:,selected]
|
|
else:
|
|
Y = Y_pca
|
|
|
|
if pca_method in ["full", "sparse", "ica"]:
|
|
W = p.components_
|
|
else:
|
|
W = p.dual_coef_
|
|
|
|
thres_1 = 0.6
|
|
thres_2 = 0.2
|
|
angle = 0.0
|
|
|
|
def rot_mat(x):
|
|
return np.array([[np.cos(x), -np.sin(x)],[np.sin(x), np.cos(x)]])
|
|
|
|
def compute_dots_idc(thres_1, thres_2, angle):
|
|
M = rot_mat(angle)
|
|
|
|
YY = np.dot(M, (Y[:, :2] - [thres_1, thres_2]).T)
|
|
|
|
return np.where((YY[0,:] >= 0) * (YY[1,:] >= 0))[0]
|
|
|
|
dots_idc = compute_dots_idc(thres_1, thres_2, angle)
|
|
|
|
new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]
|
|
for c in range(n_components):
|
|
new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp()
|
|
|
|
f = plt.figure(layout="constrained")
|
|
gs = GridSpec(2, 3, left=0, right=0.9)
|
|
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
|
|
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
|
|
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]),
|
|
# f.add_subplot(gs[2,0]), f.add_subplot(gs[2,2])
|
|
]
|
|
|
|
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02)
|
|
pca_scatter = axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black")
|
|
|
|
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
|
|
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
|
|
overlay_scatter = axes[1].scatter(i_idc[dots_idc], j_idc[dots_idc], s=0.1, color="black")
|
|
|
|
f_img, ax_img = plt.subplots()
|
|
|
|
ax_img.imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
|
|
ax_img.set_title(f"data @ {slabs[15]["e"]}cm-1")
|
|
overlay_scatter_img = ax_img.scatter(i_idc[dots_idc], j_idc[dots_idc], s=1, color="black")
|
|
|
|
for c in range(n_components):
|
|
# mask = new_coords[c] > 0.6
|
|
cb = axes[c+2].imshow(new_coords[c])
|
|
# cb = axes[c+2].imshow(mask)
|
|
plt.colorbar(cb)
|
|
if len(selected) > 0:
|
|
axes[c+2].set_title(f"Energy #{c+1} ({slabs[selected[c]]["e"]}cm-1)")
|
|
else:
|
|
axes[c+2].set_title(f"PCA component #{c+1}")
|
|
|
|
if len(selected) > 0:
|
|
axes[4].hist(Y[:,0], bins=25, ec="black", histtype="step")
|
|
axes[4].hist(Y[:,1], bins=25, ec="grey", histtype="step")
|
|
else:
|
|
axes[4].hist(Y[:,0], bins=25)
|
|
axes[4].hist(Y[:,1], bins=25)
|
|
axes[4].axvline(thres_1, lw=0.5, color="black")
|
|
axes[4].axvline(thres_2, lw=0.5, color="black")
|
|
|
|
if pca_method in ["full", "sparse", "ica"]:
|
|
for c in range(n_components):
|
|
axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid")
|
|
axes[5].axvline(2845, color="black", lw=0.5)
|
|
axes[5].axvline(2930, color="black", lw=0.5)
|
|
axes[5].set_title("Coefficients for each component")
|
|
axes[5].legend(title="Component")
|
|
for e in selected:
|
|
axes[5].axvline(utils.energies[e], ls="dotted")
|
|
|
|
# Make a horizontal slider to control the frequency.
|
|
thres_1_axis = f.add_axes([0.91, 0.05, 0.03, 0.9])
|
|
thres_1_slider = Slider(
|
|
ax=thres_1_axis,
|
|
label='Thres 1',
|
|
valmin=Y[:,0].min(),
|
|
valstep=1,
|
|
valmax=Y[:,0].max(),
|
|
valinit=0.5*(Y[:,0].min()+Y[:,0].max()),
|
|
orientation="vertical"
|
|
)
|
|
thres_2_axis = f.add_axes([0.94, 0.05, 0.03, 0.9])
|
|
thres_2_slider = Slider(
|
|
ax=thres_2_axis,
|
|
label='Thres 2',
|
|
valmin=Y[:,1].min(),
|
|
valstep=1,
|
|
valmax=Y[:,1].max(),
|
|
valinit=0.5*(Y[:,1].min()+Y[:,1].max()),
|
|
orientation="vertical"
|
|
)
|
|
|
|
angle_axis = f.add_axes([0.97, 0.05, 0.03, 0.9])
|
|
angle_slider = Slider(
|
|
ax=angle_axis,
|
|
label='Angle',
|
|
valmin=0,
|
|
valstep=0.01,
|
|
valmax=2*np.pi,
|
|
valinit=0,
|
|
orientation="vertical"
|
|
)
|
|
|
|
# # The function to be called anytime a slider's value changes
|
|
def update(val):
|
|
dots_idc = compute_dots_idc(thres_1_slider.val, thres_2_slider.val, angle_slider.val)
|
|
new_offsets = Y[dots_idc,:2]
|
|
pca_scatter.set_offsets(new_offsets)
|
|
new_offsets = np.c_[i_idc[dots_idc], j_idc[dots_idc]]
|
|
overlay_scatter.set_offsets(new_offsets)
|
|
overlay_scatter_img.set_offsets(new_offsets)
|
|
f_img.canvas.draw_idle()
|
|
|
|
def handle_click(event):
|
|
if event.inaxes != axes[0]: return
|
|
thres_1_slider.set_val(event.xdata)
|
|
thres_2_slider.set_val(event.ydata)
|
|
update(0)
|
|
|
|
f.canvas.mpl_connect('button_press_event', handle_click)
|
|
|
|
thres_1_slider.on_changed(update)
|
|
thres_2_slider.on_changed(update)
|
|
angle_slider.on_changed(update)
|
|
|
|
plt.suptitle(f"PCA Method: {pca_method}")
|
|
|
|
return [thres_1_slider, thres_2_slider, angle_slider]
|
|
|
|
def pca_cluster(n_components, n_clusters, slab_width, slab_height, pca_method="full", cluster_method="spectral", alpha=1, kernel="linear", scale=False):
|
|
slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height)
|
|
|
|
X = utils.as_np_array(slabs).T
|
|
n_freqs, n_pixels = X.shape[1], X.shape[0]
|
|
|
|
if scale:
|
|
X = utils.scale(X)
|
|
|
|
if pca_method == "full":
|
|
p = PCA(n_components = n_components)
|
|
elif pca_method == "sparse":
|
|
p = SparsePCA(n_components = n_components, alpha=alpha)
|
|
elif pca_method == "kernel":
|
|
p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True)
|
|
elif pca_method == "ica":
|
|
p = FastICA(n_components = n_components, whiten="arbitrary-variance")
|
|
else:
|
|
print(f"unknown PCA method {pca_method}")
|
|
return
|
|
|
|
Y = p.fit_transform(X)
|
|
if pca_method in ["full", "sparse", "ica"]:
|
|
W = p.components_
|
|
else:
|
|
W = p.dual_coef_
|
|
|
|
if cluster_method == "spectral":
|
|
Z = SpectralClustering(n_clusters=n_clusters, affinity="nearest_neighbors").fit(Y).labels_.astype(int)
|
|
elif cluster_method == "kmeans":
|
|
Z = KMeans(n_clusters=n_clusters).fit(Y).labels_.astype(int)
|
|
else:
|
|
print(f"unknown clustering method {cluster_method}")
|
|
return
|
|
|
|
labels = Z.reshape((slab_height, slab_width))
|
|
|
|
new_coords = np.array([Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)])
|
|
new_coords = new_coords / new_coords.sum(0)
|
|
|
|
f = plt.figure(layout="constrained")
|
|
gs = GridSpec(2, 3, figure=f)
|
|
axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]),
|
|
f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]),
|
|
f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2])
|
|
]
|
|
|
|
axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02, c=Z)
|
|
|
|
axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max)
|
|
axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1")
|
|
|
|
for c in range(n_components):
|
|
cb = axes[c+2].imshow(new_coords[c])
|
|
plt.colorbar(cb)
|
|
axes[c+2].set_title(f"Component #{c+1}")
|
|
|
|
axes[5].imshow(labels)
|
|
axes[5].set_title("Labels")
|
|
|
|
plt.suptitle(f"PCA Method: {pca_method}")
|