geometric-intelligence · amilworks · May 12, 2023 · May 10, 2023 · May 12, 2023 · May 12, 2023
diff --git a/cells/streamlit/cells/Hello.py b/cells/streamlit/cells/Hello.py
@@ -15,6 +15,8 @@
 st.markdown(
     """
     Geomstats is an open-source Python package for computations, statistics, and machine learning on nonlinear manifolds. Data from many application fields are elements of manifolds. For instance, the manifold of 3D rotations SO(3) naturally appears when performing statistical learning on articulated objects like the human spine or robotics arms. 
-    **👈 Select a demo from the sidebar** 
+    **
+
+    👈 Select a demo from the sidebar** 
 """
 )
diff --git a/cells/streamlit/cells/pages/ Elastic_Metric_for_Cell_Boundary_Analysis.py b/cells/streamlit/cells/pages/ Elastic_Metric_for_Cell_Boundary_Analysis.py
@@ -174,7 +174,7 @@
 NEW_AS = [0.75, 0.5, 0.25, 0.01] #, 1.6] #, 1.4, 1.2, 1, 0.5, 0.2, 0.1]
 NEW_BS = [0.5, 0.5, 0.5, 0.5] #, 2, 2, 2, 2, 2, 2, 2]
 
-st.makrdown("## Distances to the Mean")
+st.markdown("## Distances to the Mean")
 
 # We multiply the distances by a 100, for visualization purposes. It amounts to a change of units.
 dists = {}
@@ -203,7 +203,7 @@
 for a, b in zip(NEW_AS, NEW_BS):
     dists_summary[f"Elastic({a}, {b})"] = dists[a, b]
 
-st.dataframe(dists_summary)
+st.dataframe(dists_summary) 
 # SAVEFIG = True
 # if SAVEFIG:
 #     figs_dir = os.path.join(work_dir, f"cells/saved_figs/{dataset_name}")

diff --git a/cells/streamlit/cells/pages/1-Load_Data.py b/cells/streamlit/cells/pages/1-Load_Data.py
@@ -0,0 +1,119 @@
+import streamlit as st
+import pandas as pd
+import os
+import time
+import matplotlib.pyplot as plt
+
+import sys
+sys.path.append("/app/utils") 
+
+from utils.data_utils import build_rois, find_all_instances
+
+current_time = time.localtime()
+
+year = time.strftime("%Y", current_time)
+day_of_year = time.strftime("%j", current_time)
+time_string = time.strftime("%H%M%S", current_time)
+
+current_time_string = f"{year}{day_of_year}-{time_string}"
+
+if "cells_list" not in st.session_state:
+   st.session_state["cells_list"] = True
+
+st.write("# Load Your Cell Data 👋")
+
+st.markdown(
+"""
+## Getting Started
+
+We currently support an ROI zip folder created by FIJI/ImageJ. What this means is you may have a folder structure as follows:
+```
+    └── Cropped_Images
+        ├── Bottom_plank_0
+        │   ├── Averaged_ROI
+        │   ├── Data
+        │   ├── Data_Filtered
+        │   ├── Labels
+        │   ├── OG
+        │   ├── Outlines
+        │   └── ROIs  <---- Folder of zipped ROIs
+```
+You can simply upload this ROIs folder and we will load your data for you. We plan on supporting data given in `xy` coordinate format from `JSON` and CSV/TXT files.
+Your chosen data structure __must__ contain `x` and `y` for the program to correctly parse and load your data.
+"""
+)
+
+def get_files_from_folder(folder_path):
+    """
+    Retrieves a list of files from a specific folder.
+
+    Parameters:
+        folder_path (str): The path to the folder.
+
+    Returns:
+        list: A list of file paths.
+
+    Example:
+        >>> folder_path = '/path/to/folder'
+        >>> files = get_files_from_folder(folder_path)
+        >>> print(files)
+        ['/path/to/folder/file1.txt', '/path/to/folder/file2.csv', '/path/to/folder/file3.jpg']
+    """
+    files = []
+    for filename in os.listdir(folder_path):
+        if os.path.isfile(os.path.join(folder_path, filename)):
+            files.append(os.path.join(folder_path, filename))
+    return files
+
+
+
+# Specify the folder path for file uploads and save run with date and time
+upload_folder = f"/app/data/run-{current_time_string}"
+
+# Check if the upload folder exists, and create it if it doesn't
+if not os.path.exists(upload_folder):
+    os.makedirs(upload_folder)
+    st.info(f"Upload folder created: {upload_folder}")
+
+# Get the list of files in the upload folder
+files = get_files_from_folder(upload_folder)
+
+
+
+# Display the file uploader
+uploaded_files = st.file_uploader("Upload a file", type=["zip"], accept_multiple_files=True)
+
+
+# Process the uploaded files
+if uploaded_files is not None:
+    progress_bar = st.progress(0)
+    total_files = len(uploaded_files)
+    completed_files = 0
+
+    for uploaded_file in uploaded_files:
+        file_path = os.path.join(upload_folder, uploaded_file.name)
+        with open(file_path, "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        completed_files += 1
+        progress = int((completed_files / total_files) * 100)
+        progress_bar.progress(progress)
+        # st.write(f"File saved: {file_path}")
+
+
+# Build a dictionary of all the ROIs
+dict_rois = build_rois(upload_folder)
+
+# Extract the cells
+cells_list = []
+find_all_instances(dict_rois, 'x', 'y', cells_list)
+st.session_state["cells_list"] = cells_list
+
+st.write(f"Successfully Loaded {len(cells_list)} cells.")
+
+# Sanity check visualization
+cell_num = st.number_input(f"Visualize a cell. Pick a number between 0 and {len(cells_list)-1}", min_value=0)
+
+
+fig, ax = plt.subplots()
+ax.plot(cells_list[cell_num][:,0], cells_list[cell_num][:,1])
+st.pyplot(fig)
diff --git a/cells/streamlit/cells/pages/Cell_Shear.py b/cells/streamlit/cells/pages/Cell_Shear.py
@@ -1,49 +1,107 @@
 import streamlit as st
 import pandas as pd
-from io import StringIO
+import matplotlib.pyplot as plt
+
+import geomstats.backend as gs
+from geomstats.geometry.euclidean import Euclidean
+from geomstats.geometry.discrete_curves import R2, DiscreteCurves, ClosedDiscreteCurves
+
+from geomstats.learning.frechet_mean import FrechetMean
+from geomstats.learning.kmeans import RiemannianKMeans
+from geomstats.learning.mdm import RiemannianMinimumDistanceToMean
+from geomstats.learning.pca import TangentPCA
+
+from utils import experimental
+
+st.write(st.session_state["cells_list"])
 
 st.write("# Welcome to the Cell Shear Analysis App! 👋")
 
 st.markdown(
     """
-    ## Data Source
-
-> Ehsan Sadeghipour, Miguel A Garcia, William James Nelson, Beth L Pruitt (2018) Shear-induced damped oscillations in an epithelium depend on actomyosin contraction and E-cadherin cell adhesion eLife 7:e39640 https://doi.org/10.7554/eLife.39640
-
-![](https://raw.githubusercontent.com/amilworks/ece594n/728845ba67ef604d307be98f78b872aa4d4052a4/hw_project/PredictingCellShear/figs/Graphical_Abstract_V1%404x.png)
+    ## Step Zero
 
-# Introduction and Motivation
+    👈 If you have not already uploaded your data, please select the __Load Data__ page and follow the instructions. The format is important, so please read carefully.
 
-Cell-cell shear, or the action of cells sliding past each other, has roles in development, disease, and wound healing. Throughout development cells are moving past each other in every stage of development. These biomechanical cues have influences on differentiation, cell shape, behavior, the proteome, and the transcriptome. 
+    ## Analyzing Cell Data
 
-Previous research on shear focused on fluid shear so in this paper they focused on cell-cell shear which has been well characterized. Epithelial cells known as MDCK cells were used on a MEMS device which can be precisely displaced to create consistent cell-cell shear forces. Using new segmentation and machine learning techniques we are reanalyzing the data to use the changes in cell shape to predict cell behavior/migration.
-
+    Now we will start analyzing our data. The first step is preprocessing our data, specifically interpolating, removing duplicates, and quotienting. 
 """
 )
 
-uploaded_file = st.file_uploader("Choose a file")
-if uploaded_file is not None:
-    # To read file as bytes:
-    bytes_data = uploaded_file.getvalue()
-    st.write(bytes_data)
-
-    # To convert to a string based IO:
-    stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
-    st.write(stringio)
-
-    # To read file as string:
-    string_data = stringio.read()
-    st.write(string_data)
-
-    # Can be used wherever a "file-like" object is accepted:
-    dataframe = pd.read_csv(uploaded_file)
-    st.write(dataframe)
-st.set_page_config(
-    page_title="Hello",
-    page_icon="👋",
-)
+cells_list = st.session_state["cells_list"] 
+
+n_sampling_points = st.slider('Select the Number of Sampling Points', 0, 100, 50)
+cells, cell_shapes = experimental.nolabel_preprocess(cells_list, len(cells_list), n_sampling_points)
+
+
+R1 = Euclidean(dim=1)
+CLOSED_CURVES_SPACE = ClosedDiscreteCurves(R2)
+CURVES_SPACE = DiscreteCurves(R2)
+SRV_METRIC = CURVES_SPACE.srv_metric
+L2_METRIC = CURVES_SPACE.l2_curves_metric
+
+ELASTIC_METRIC = {}
+AS = [1, 2, 0.75, 0.5, 0.25, 0.01] #, 1.6] #, 1.4, 1.2, 1, 0.5, 0.2, 0.1]
+BS = [0.5, 1, 0.5, 0.5, 0.5, 0.5] #, 2, 2, 2, 2, 2, 2, 2]
+for a, b in zip(AS, BS):
+    ELASTIC_METRIC[a, b] = DiscreteCurves(R2, a=a, b=b).elastic_metric
+METRICS = {}
+METRICS["Linear"] = L2_METRIC
+METRICS["SRV"] = SRV_METRIC
+
+
+means = {}
+
+means["Linear"] = gs.mean(cell_shapes, axis=0)
+means["SRV"] = FrechetMean(
+        metric=SRV_METRIC, 
+        method="default").fit(cell_shapes).estimate_
+
+
+for a, b in zip(AS, BS):
+    means[a, b] = FrechetMean(
+            metric=ELASTIC_METRIC[a, b], 
+            method="default").fit(cell_shapes).estimate_
+
+
+fig = plt.figure(figsize=(18, 8))
+
+ncols = len(means) // 2
+
+for i, (mean_name, mean) in enumerate(means.items()):
+    ax = fig.add_subplot(2, ncols, i+1)
+    ax.plot(mean[:, 0], mean[:, 1], "black")
+    ax.set_aspect("equal")
+    ax.axis("off")
+    axs_title = mean_name
+    if mean_name not in ["Linear", "SRV"]:
+        a = mean_name[0]
+        b = mean_name[1]
+        ratio = a / (2 * b)
+        mean_name = f"Elastic {mean_name}\n a / (2b) = {ratio}"
+    ax.set_title(mean_name)
+
+st.pyplot(fig)
+
+
+fig = plt.figure(figsize=(18, 8))
 
+ncols = len(means) // 2
 
+for i, (mean_name, mean) in enumerate(means.items()):
+    ax = fig.add_subplot(2, ncols, i+1)
+    mean = CLOSED_CURVES_SPACE.projection(mean)
+    ax.plot(mean[:, 0], mean[:, 1], "black")
+    ax.set_aspect("equal")
+    ax.axis("off")
+    axs_title = mean_name
+    if mean_name not in ["Linear", "SRV"]:
+        a = mean_name[0]
+        b = mean_name[1]
+        ratio = a / (2 * b)
+        mean_name = f"Elastic {mean_name}\n a / (2b) = {ratio}"
+    ax.set_title(mean_name)
 
-st.sidebar.success("Select a demo above.")
+st.pyplot(fig)
 
diff --git a/cells/streamlit/cells/utils/data_utils.py b/cells/streamlit/cells/utils/data_utils.py
@@ -0,0 +1,100 @@
+import os
+from read_roi import read_roi_zip
+import numpy as np
+
+
+def build_rois(path) -> dict:
+    """
+    Builds a dictionary of region of interest (ROI) data from a directory of ROI files.
+
+    Parameters:
+        path (str): The path to the directory containing ROI files.
+
+    Returns:
+        dict: A dictionary where the keys are ROI names and the values are the corresponding ROI data.
+
+    Example:
+        >>> roi_directory = '/path/to/roi_directory'
+        >>> rois = build_rois(roi_directory)
+        >>> print(rois)
+        {'roi1': <ROI data>, 'roi2': <ROI data>, ...}
+    """
+    rois = {}
+    for roi in sorted(os.listdir(path)):
+        # print(roi.split(".")[0])
+        rois[roi.split(".")[0]] = read_roi_zip(os.path.join(path,roi))
+    return rois
+
+
+def find_key(dictionary, target_key):
+    """
+    Recursively searches for a key in a nested dictionary.
+
+    Parameters:
+        dictionary (dict): The nested dictionary to search.
+        target_key (str): The key to find.
+
+    Returns:
+        object: The value associated with the target key, or None if the key is not found.
+
+    Example:
+        >>> data = {
+        ...     'key1': {
+        ...         'key2': {
+        ...             'key3': 'value3',
+        ...             'key4': 'value4'
+        ...         }
+        ...     }
+        ... }
+        >>> result = find_key(data, 'key4')
+        >>> print(result)
+        value4
+    """
+    if target_key in dictionary:
+        return dictionary[target_key]
+
+    for value in dictionary.values():
+        if isinstance(value, dict):
+            result = find_key(value, target_key)
+            if result is not None:
+                return result
+
+    return None
+
+
+def find_all_instances(dictionary, target_key1, target_key2, results_list):
+    """
+    Recursively finds instances of two target keys in a nested dictionary and appends their corresponding values together.
+
+    Parameters:
+        dictionary (dict): The nested dictionary to search.
+        target_key1 (hashable): The first target key to find.
+        target_key2 (hashable): The second target key to find.
+        results_list (list): The list where the corresponding values will be appended.
+
+    Returns:
+        None
+
+    Example:
+        >>> my_dict = {
+        ...     "a": 1,
+        ...     "b": {"c": 2, "d": 3},
+        ...     "e": {"f": 4, "g": {"a": 5, "c": 6}},
+        ...     "i": 7
+        ... }
+        >>> target_key1 = "a"
+        >>> target_key2 = "c"
+        >>> instances = []
+        >>> find_all_instances(my_dict, target_key1, target_key2, instances)
+        >>> print(instances)
+        [5, 6]
+    """
+    found_keys = set()
+    for key, value in dictionary.items():
+        if key == target_key1 or key == target_key2:
+            found_keys.add(key)
+        elif isinstance(value, dict):
+            find_all_instances(value, target_key1, target_key2, results_list)
+
+    if {target_key1, target_key2}.issubset(found_keys):
+        results_list.append(np.array([dictionary[target_key1], dictionary[target_key2]]).T)