geometric-intelligence · amilworks · May 18, 2023 · May 18, 2023
diff --git a/cells/streamlit/cellgeometry/Hello.py b/cells/streamlit/cellgeometry/Hello.py
@@ -0,0 +1,22 @@
+import streamlit as st
+
+
+
+
+st.set_page_config(
+    page_title="Welcome",
+    page_icon="👋",
+)
+
+st.write("# Welcome to the Cell Shape Analysis App! 👋")
+
+st.sidebar.success("Select a demo above.")
+
+st.markdown(
+    """
+    Geomstats is an open-source Python package for computations, statistics, and machine learning on nonlinear manifolds. Data from many application fields are elements of manifolds. For instance, the manifold of 3D rotations SO(3) naturally appears when performing statistical learning on articulated objects like the human spine or robotics arms. 
+    **
+
+    👈 Select a demo from the sidebar** 
+"""
+)
diff --git a/cells/streamlit/cellgeometry/__init__.py b/cells/streamlit/cellgeometry/__init__.py
diff --git a/cells/streamlit/cellgeometry/pages/ Elastic_Metric_for_Cell_Boundary_Analysis.py b/cells/streamlit/cellgeometry/pages/ Elastic_Metric_for_Cell_Boundary_Analysis.py
@@ -0,0 +1,216 @@
+import os
+import subprocess
+import geomstats.backend as gs
+import streamlit as st
+import time
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from scipy import stats
+
+from sklearn.cluster import KMeans
+from sklearn.metrics import confusion_matrix
+from sklearn.decomposition import PCA
+
+from scipy.optimize import linear_sum_assignment as linear_assignment
+from sklearn import manifold
+from joblib import Parallel, delayed
+from numba import jit, njit, prange
+
+from geomstats.geometry.euclidean import Euclidean
+from geomstats.geometry.discrete_curves import R2, DiscreteCurves, ClosedDiscreteCurves
+
+from geomstats.learning.frechet_mean import FrechetMean
+from geomstats.learning.kmeans import RiemannianKMeans
+from geomstats.learning.mdm import RiemannianMinimumDistanceToMean
+from geomstats.learning.pca import TangentPCA
+
+import sys
+sys.path.append("/app/utils") 
+
+# import utils
+from utils import experimental as experimental
+from utils import basic as basic
+
+
+
+
+st.set_page_config(page_title="Elastic Metric for Cell Boundary Analysis", page_icon="📈")
+
+st.markdown("# Shape Analysis of Cancer Cells")
+st.sidebar.header("Shape Analysis of Cancer Cells")
+st.write(
+    """This notebook studies Osteosarcoma (bone cancer) cells and the impact of drug treatment on their morphological shapes, by analyzing cell images obtained from fluorescence microscopy.
+
+This analysis relies on the elastic metric between discrete curves from Geomstats. We will study to which extent this metric can detect how the cell shape is associated with the response to treatment."""
+)
+
+dataset_name = "osteosarcoma"
+
+n_sampling_points = st.slider('Select the Number of Sampling Points', 0, 250, 100)
+n_cells = 650
+# n_sampling_points = 100
+labels_a_name = "lines"
+labels_b_name = "treatments"
+
+quotient = ["rotation"] #["scaling"] #, "rotation"]
+do_not_quotient = False
+
+
+if dataset_name == "osteosarcoma":
+    cells, cell_shapes, labels_a, labels_b = experimental.load_treated_osteosarcoma_cells(
+        n_cells=n_cells, n_sampling_points=n_sampling_points, quotient=quotient
+    )
+else:
+    pass
+
+
+labels_a_dict = {lab: i_lab for i_lab, lab in enumerate(np.unique(labels_a))}
+labels_b_dict = {lab: i_lab for i_lab, lab in enumerate(np.unique(labels_b))}
+
+print(f"Dictionary associated to label \"{labels_a_name}\":")
+print(labels_a_dict)
+print(f"Dictionary associated to label \"{labels_b_name}\":")
+print(labels_b_dict)
+
+if do_not_quotient:
+    cell_shapes = cells
+
+n_cells_to_plot = 10
+
+fig = plt.figure(figsize=(16, 6))
+count = 1
+for label_b in np.unique(labels_b):
+    for i_lab_a, label_a in enumerate(np.unique(labels_a)):
+        cell_data = [cell for cell, lab_a, lab_b in zip(cell_shapes, labels_a, labels_b) if lab_a == label_a and lab_b == label_b]
+        for i_to_plot in range(n_cells_to_plot):
+            cell = gs.random.choice(a=cell_data)
+            fig.add_subplot(len(np.unique(labels_b)), len(np.unique(labels_a)) * n_cells_to_plot, count)
+            count += 1
+            plt.plot(cell[:, 0], cell[:, 1], color=f"C{i_lab_a}" )
+            plt.axis("equal")
+            plt.axis("off")
+            if i_to_plot == n_cells_to_plot // 2:
+                plt.title(f"{label_a}   -   {label_b}", fontsize=20)
+st.pyplot(fig)
+
+# Define shape space
+R1 = Euclidean(dim=1)
+CLOSED_CURVES_SPACE = ClosedDiscreteCurves(R2)
+CURVES_SPACE = DiscreteCurves(R2)
+SRV_METRIC = CURVES_SPACE.srv_metric
+L2_METRIC = CURVES_SPACE.l2_curves_metric
+
+ELASTIC_METRIC = {}
+AS = [1, 2, 0.75, 0.5, 0.25, 0.01] #, 1.6] #, 1.4, 1.2, 1, 0.5, 0.2, 0.1]
+BS = [0.5, 1, 0.5, 0.5, 0.5, 0.5] #, 2, 2, 2, 2, 2, 2, 2]
+for a, b in zip(AS, BS):
+    ELASTIC_METRIC[a, b] = DiscreteCurves(R2, a=a, b=b).elastic_metric
+METRICS = {}
+METRICS["Linear"] = L2_METRIC
+METRICS["SRV"] = SRV_METRIC
+
+means = {}
+
+means["Linear"] = gs.mean(cell_shapes, axis=0)
+means["SRV"] = FrechetMean(
+        metric=SRV_METRIC, 
+        method="default").fit(cell_shapes).estimate_
+
+for a, b in zip(AS, BS):
+    means[a, b] = FrechetMean(
+            metric=ELASTIC_METRIC[a, b], 
+            method="default").fit(cell_shapes).estimate_
+
+st.header("Sample Means")
+st.markdown("We compare results when computing the mean cell versus the mean cell shapes with different elastic metrics.")
+fig = plt.figure(figsize=(18, 8))
+
+ncols = len(means) // 2
+
+for i, (mean_name, mean) in enumerate(means.items()):
+    ax = fig.add_subplot(2, ncols, i+1)
+    ax.plot(mean[:, 0], mean[:, 1], "black")
+    ax.set_aspect("equal")
+    ax.axis("off")
+    axs_title = mean_name
+    if mean_name not in ["Linear", "SRV"]:
+        a = mean_name[0]
+        b = mean_name[1]
+        ratio = a / (2 * b)
+        mean_name = f"Elastic {mean_name}\n a / (2b) = {ratio}"
+    ax.set_title(mean_name)
+
+st.pyplot(fig)
+
+
+
+fig = plt.figure(figsize=(18, 8))
+
+ncols = len(means) // 2
+
+for i, (mean_name, mean) in enumerate(means.items()):
+    ax = fig.add_subplot(2, ncols, i+1)
+    mean = CLOSED_CURVES_SPACE.projection(mean)
+    ax.plot(mean[:, 0], mean[:, 1], "black")
+    ax.set_aspect("equal")
+    ax.axis("off")
+    axs_title = mean_name
+    if mean_name not in ["Linear", "SRV"]:
+        a = mean_name[0]
+        b = mean_name[1]
+        ratio = a / (2 * b)
+        mean_name = f"Elastic {mean_name}\n a / (2b) = {ratio}"
+    ax.set_title(mean_name)
+
+
+st.markdown("__Remark:__ Unfortunately, there are some numerical issues with the projection in the space of closed curves, as shown by the V-shaped results above.")
+
+st.markdown("Since ratios of 1 give the same results as for the SRV metric, we only select AS, BS with a ratio that is not 1 for the elastic metrics.")
+
+st.markdown("We also continue the analysis with the space of open curves, as opposed to the space of closed curves, for the numerical issues observed above.")
+
+
+NEW_AS = [0.75, 0.5, 0.25, 0.01] #, 1.6] #, 1.4, 1.2, 1, 0.5, 0.2, 0.1]
+NEW_BS = [0.5, 0.5, 0.5, 0.5] #, 2, 2, 2, 2, 2, 2, 2]
+
+st.markdown("## Distances to the Mean")
+
+# We multiply the distances by a 100, for visualization purposes. It amounts to a change of units.
+dists = {}
+
+dists["Linear"] = [100 * gs.linalg.norm(means["Linear"] - cell) / n_sampling_points for cell in cell_shapes]
+
+dists["SRV"] = [
+    100 * SRV_METRIC.dist(means["SRV"], cell) / n_sampling_points for cell in cell_shapes
+]
+
+for a, b in zip(NEW_AS, NEW_BS):
+    dists[a, b] =  [
+    100 * ELASTIC_METRIC[a, b].dist(means[a, b], cell) / n_sampling_points for cell in cell_shapes
+]
+
+
+dists_summary = pd.DataFrame(
+    data={
+        labels_a_name: labels_a,
+        labels_b_name: labels_b,
+        "Linear": dists["Linear"],
+        "SRV": dists["SRV"],
+    }
+)
+
+for a, b in zip(NEW_AS, NEW_BS):
+    dists_summary[f"Elastic({a}, {b})"] = dists[a, b]
+
+st.dataframe(dists_summary) 
+# SAVEFIG = True
+# if SAVEFIG:
+#     figs_dir = os.path.join(work_dir, f"cells/saved_figs/{dataset_name}")
+#     if not os.path.exists(figs_dir):
+#         os.makedirs(figs_dir)
+#     print(f"Will save figs to {figs_dir}")
+#     from datetime import datetime
+
+#     now = datetime.now().strftime("%Y%m%d_%H_%M_%S")
+#     print(now)
diff --git a/cells/streamlit/cellgeometry/pages/1-Load_Data.py b/cells/streamlit/cellgeometry/pages/1-Load_Data.py
@@ -0,0 +1,119 @@
+import streamlit as st
+import pandas as pd
+import os
+import time
+import matplotlib.pyplot as plt
+
+import sys
+sys.path.append("/app/utils") 
+
+from utils.data_utils import build_rois, find_all_instances
+
+current_time = time.localtime()
+
+year = time.strftime("%Y", current_time)
+day_of_year = time.strftime("%j", current_time)
+time_string = time.strftime("%H%M%S", current_time)
+
+current_time_string = f"{year}{day_of_year}-{time_string}"
+
+if "cells_list" not in st.session_state:
+   st.session_state["cells_list"] = True
+
+st.write("# Load Your Cell Data 👋")
+
+st.markdown(
+"""
+## Getting Started
+
+We currently support an ROI zip folder created by FIJI/ImageJ. What this means is you may have a folder structure as follows:
+```
+    └── Cropped_Images
+        ├── Bottom_plank_0
+        │   ├── Averaged_ROI
+        │   ├── Data
+        │   ├── Data_Filtered
+        │   ├── Labels
+        │   ├── OG
+        │   ├── Outlines
+        │   └── ROIs  <---- Folder of zipped ROIs
+```
+You can simply upload this ROIs folder and we will load your data for you. We plan on supporting data given in `xy` coordinate format from `JSON` and CSV/TXT files.
+Your chosen data structure __must__ contain `x` and `y` for the program to correctly parse and load your data.
+"""
+)
+
+def get_files_from_folder(folder_path):
+    """
+    Retrieves a list of files from a specific folder.
+
+    Parameters:
+        folder_path (str): The path to the folder.
+
+    Returns:
+        list: A list of file paths.
+
+    Example:
+        >>> folder_path = '/path/to/folder'
+        >>> files = get_files_from_folder(folder_path)
+        >>> print(files)
+        ['/path/to/folder/file1.txt', '/path/to/folder/file2.csv', '/path/to/folder/file3.jpg']
+    """
+    files = []
+    for filename in os.listdir(folder_path):
+        if os.path.isfile(os.path.join(folder_path, filename)):
+            files.append(os.path.join(folder_path, filename))
+    return files
+
+
+
+# Specify the folder path for file uploads and save run with date and time
+upload_folder = f"/app/data/run-{current_time_string}"
+
+# Check if the upload folder exists, and create it if it doesn't
+if not os.path.exists(upload_folder):
+    os.makedirs(upload_folder)
+    st.info(f"Upload folder created: {upload_folder}")
+
+# Get the list of files in the upload folder
+files = get_files_from_folder(upload_folder)
+
+
+
+# Display the file uploader
+uploaded_files = st.file_uploader("Upload a file", type=["zip"], accept_multiple_files=True)
+
+
+# Process the uploaded files
+if uploaded_files is not None:
+    progress_bar = st.progress(0)
+    total_files = len(uploaded_files)
+    completed_files = 0
+
+    for uploaded_file in uploaded_files:
+        file_path = os.path.join(upload_folder, uploaded_file.name)
+        with open(file_path, "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        completed_files += 1
+        progress = int((completed_files / total_files) * 100)
+        progress_bar.progress(progress)
+        # st.write(f"File saved: {file_path}")
+
+
+# Build a dictionary of all the ROIs
+dict_rois = build_rois(upload_folder)
+
+# Extract the cells
+cells_list = []
+find_all_instances(dict_rois, 'x', 'y', cells_list)
+st.session_state["cells_list"] = cells_list
+
+st.write(f"Successfully Loaded {len(cells_list)} cells.")
+
+# Sanity check visualization
+cell_num = st.number_input(f"Visualize a cell. Pick a number between 0 and {len(cells_list)-1}", min_value=0)
+
+
+fig, ax = plt.subplots()
+ax.plot(cells_list[cell_num][:,0], cells_list[cell_num][:,1])
+st.pyplot(fig)