import utils import numpy as np from sklearn.decomposition import PCA, SparsePCA, KernelPCA, FastICA from sklearn.cluster import SpectralClustering, KMeans import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec from matplotlib.widgets import Slider def pca(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, sliders=False, selected=[]): if sliders: return pca_sliders(n_components, slab_width, slab_height, pca_method=pca_method, alpha=alpha, kernel=kernel, scale=scale, selected=selected) slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height) X = utils.as_np_array(slabs).T n_freqs, n_pixels = X.shape[1], X.shape[0] if scale: X = utils.scale(X) if pca_method == "full": p = PCA(n_components = n_components) elif pca_method == "sparse": p = SparsePCA(n_components = n_components, alpha=alpha) elif pca_method == "kernel": p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True) elif pca_method == "ica": p = FastICA(n_components = n_components, whiten="arbitrary-variance") else: print(f"unknown PCA pca_method {pca_method}") return Y = p.fit_transform(X) if pca_method in ["full", "sparse", "ica"]: W = p.components_ else: W = p.dual_coef_ dots_idc = np.where(Y[:,0] > 0.6) new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)] for c in range(n_components): new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp() f = plt.figure(layout="constrained") gs = GridSpec(2, 3, figure=f) axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]), f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]), f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]) ] axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02) axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black") axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max) axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1") for c in range(n_components-1): # mask = new_coords[c] > 0.6 cb = axes[c+2].imshow(new_coords[c]) # cb = axes[c+2].imshow(mask) plt.colorbar(cb) axes[c+2].set_title(f"Component #{c+1}") axes[4].hist(Y[:,0], bins=25) if pca_method in ["full", "sparse", "ica"]: for c in range(n_components-1): axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid") axes[5].axvline(2845, color="black", lw=0.5) axes[5].axvline(2930, color="black", lw=0.5) axes[5].set_title("Coefficients for each component") plt.legend(title="Component") plt.suptitle(f"PCA Method: {pca_method}") def pca_sliders(n_components, slab_width, slab_height, pca_method="full", alpha=1, kernel="linear", scale=False, selected=[]): slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height) i_idc = np.array([[i for i in range(slab_height)] for j in range(slab_width)]).flatten() j_idc = np.array([[j for i in range(slab_height)] for j in range(slab_width)]).flatten() X = utils.as_np_array(slabs).T n_pixels, n_freqs = X.shape if scale: X = utils.scale(X) if pca_method == "full": p = PCA(n_components = n_components) elif pca_method == "sparse": p = SparsePCA(n_components = n_components, alpha=alpha) elif pca_method == "kernel": p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True) elif pca_method == "ica": p = FastICA(n_components = n_components, whiten="arbitrary-variance") else: print(f"unknown PCA pca_method {pca_method}") return Y_pca = p.fit_transform(X) if len(selected) > 0: Y = X[:,selected] else: Y = Y_pca if pca_method in ["full", "sparse", "ica"]: W = p.components_ else: W = p.dual_coef_ thres_1 = 0.6 thres_2 = 0.2 angle = 0.0 def rot_mat(x): return np.array([[np.cos(x), -np.sin(x)],[np.sin(x), np.cos(x)]]) def compute_dots_idc(thres_1, thres_2, angle): M = rot_mat(angle) YY = np.dot(M, (Y[:, :2] - [thres_1, thres_2]).T) return np.where((YY[0,:] >= 0) * (YY[1,:] >= 0))[0] dots_idc = compute_dots_idc(thres_1, thres_2, angle) new_coords = [Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)] for c in range(n_components): new_coords[c] = (new_coords[c] - new_coords[c].min())/new_coords[c].ptp() f = plt.figure(layout="constrained") gs = GridSpec(2, 3, left=0, right=0.9) axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]), f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]), f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]), # f.add_subplot(gs[2,0]), f.add_subplot(gs[2,2]) ] axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02) pca_scatter = axes[0].scatter(Y[dots_idc,0], Y[dots_idc,1], s=20, alpha=1, color="black") axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max) axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1") overlay_scatter = axes[1].scatter(i_idc[dots_idc], j_idc[dots_idc], s=0.1, color="black") f_img, ax_img = plt.subplots() ax_img.imshow(slabs[15]["data"], vmin=i_min, vmax=i_max) ax_img.set_title(f"data @ {slabs[15]["e"]}cm-1") overlay_scatter_img = ax_img.scatter(i_idc[dots_idc], j_idc[dots_idc], s=1, color="black") for c in range(n_components): # mask = new_coords[c] > 0.6 cb = axes[c+2].imshow(new_coords[c]) # cb = axes[c+2].imshow(mask) plt.colorbar(cb) if len(selected) > 0: axes[c+2].set_title(f"Energy #{c+1} ({slabs[selected[c]]["e"]}cm-1)") else: axes[c+2].set_title(f"PCA component #{c+1}") if len(selected) > 0: axes[4].hist(Y[:,0], bins=25, ec="black", histtype="step") axes[4].hist(Y[:,1], bins=25, ec="grey", histtype="step") else: axes[4].hist(Y[:,0], bins=25) axes[4].hist(Y[:,1], bins=25) axes[4].axvline(thres_1, lw=0.5, color="black") axes[4].axvline(thres_2, lw=0.5, color="black") if pca_method in ["full", "sparse", "ica"]: for c in range(n_components): axes[5].step(utils.energies, W[c, :], label=f"{c+1}", where="mid") axes[5].axvline(2845, color="black", lw=0.5) axes[5].axvline(2930, color="black", lw=0.5) axes[5].set_title("Coefficients for each component") axes[5].legend(title="Component") for e in selected: axes[5].axvline(utils.energies[e], ls="dotted") # Make a horizontal slider to control the frequency. thres_1_axis = f.add_axes([0.91, 0.05, 0.03, 0.9]) thres_1_slider = Slider( ax=thres_1_axis, label='Thres 1', valmin=Y[:,0].min(), valstep=1, valmax=Y[:,0].max(), valinit=0.5*(Y[:,0].min()+Y[:,0].max()), orientation="vertical" ) thres_2_axis = f.add_axes([0.94, 0.05, 0.03, 0.9]) thres_2_slider = Slider( ax=thres_2_axis, label='Thres 2', valmin=Y[:,1].min(), valstep=1, valmax=Y[:,1].max(), valinit=0.5*(Y[:,1].min()+Y[:,1].max()), orientation="vertical" ) angle_axis = f.add_axes([0.97, 0.05, 0.03, 0.9]) angle_slider = Slider( ax=angle_axis, label='Angle', valmin=0, valstep=0.01, valmax=2*np.pi, valinit=0, orientation="vertical" ) # # The function to be called anytime a slider's value changes def update(val): dots_idc = compute_dots_idc(thres_1_slider.val, thres_2_slider.val, angle_slider.val) new_offsets = Y[dots_idc,:2] pca_scatter.set_offsets(new_offsets) new_offsets = np.c_[i_idc[dots_idc], j_idc[dots_idc]] overlay_scatter.set_offsets(new_offsets) overlay_scatter_img.set_offsets(new_offsets) f_img.canvas.draw_idle() def handle_click(event): if event.inaxes != axes[0]: return thres_1_slider.set_val(event.xdata) thres_2_slider.set_val(event.ydata) update(0) f.canvas.mpl_connect('button_press_event', handle_click) thres_1_slider.on_changed(update) thres_2_slider.on_changed(update) angle_slider.on_changed(update) plt.suptitle(f"PCA Method: {pca_method}") return [thres_1_slider, thres_2_slider, angle_slider] def pca_cluster(n_components, n_clusters, slab_width, slab_height, pca_method="full", cluster_method="spectral", alpha=1, kernel="linear", scale=False): slabs, i_min, i_max = utils.load_slabs(slab_width, slab_height) X = utils.as_np_array(slabs).T n_freqs, n_pixels = X.shape[1], X.shape[0] if scale: X = utils.scale(X) if pca_method == "full": p = PCA(n_components = n_components) elif pca_method == "sparse": p = SparsePCA(n_components = n_components, alpha=alpha) elif pca_method == "kernel": p = KernelPCA(n_components = n_components, kernel=kernel, fit_inverse_transform=True) elif pca_method == "ica": p = FastICA(n_components = n_components, whiten="arbitrary-variance") else: print(f"unknown PCA method {pca_method}") return Y = p.fit_transform(X) if pca_method in ["full", "sparse", "ica"]: W = p.components_ else: W = p.dual_coef_ if cluster_method == "spectral": Z = SpectralClustering(n_clusters=n_clusters, affinity="nearest_neighbors").fit(Y).labels_.astype(int) elif cluster_method == "kmeans": Z = KMeans(n_clusters=n_clusters).fit(Y).labels_.astype(int) else: print(f"unknown clustering method {cluster_method}") return labels = Z.reshape((slab_height, slab_width)) new_coords = np.array([Y[:,i].reshape((slab_height, slab_width)) for i in range(n_components)]) new_coords = new_coords / new_coords.sum(0) f = plt.figure(layout="constrained") gs = GridSpec(2, 3, figure=f) axes = [f.add_subplot(gs[0,0]), f.add_subplot(gs[1,0]), f.add_subplot(gs[0,1]), f.add_subplot(gs[0,2]), f.add_subplot(gs[1,1]), f.add_subplot(gs[1,2]) ] axes[0].scatter(Y[:,0], Y[:,1], s=20, alpha=0.02, c=Z) axes[1].imshow(slabs[15]["data"], vmin=i_min, vmax=i_max) axes[1].set_title(f"data @ {slabs[15]["e"]}cm-1") for c in range(n_components): cb = axes[c+2].imshow(new_coords[c]) plt.colorbar(cb) axes[c+2].set_title(f"Component #{c+1}") axes[5].imshow(labels) axes[5].set_title("Labels") plt.suptitle(f"PCA Method: {pca_method}")