Skip to content

Updates to Pages and Dependencies #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cells/streamlit/cells/Hello.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
st.markdown(
"""
Geomstats is an open-source Python package for computations, statistics, and machine learning on nonlinear manifolds. Data from many application fields are elements of manifolds. For instance, the manifold of 3D rotations SO(3) naturally appears when performing statistical learning on articulated objects like the human spine or robotics arms.
**👈 Select a demo from the sidebar**
**

👈 Select a demo from the sidebar**
"""
)
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@
NEW_AS = [0.75, 0.5, 0.25, 0.01] #, 1.6] #, 1.4, 1.2, 1, 0.5, 0.2, 0.1]
NEW_BS = [0.5, 0.5, 0.5, 0.5] #, 2, 2, 2, 2, 2, 2, 2]

st.makrdown("## Distances to the Mean")
st.markdown("## Distances to the Mean")

# We multiply the distances by a 100, for visualization purposes. It amounts to a change of units.
dists = {}
Expand Down Expand Up @@ -203,7 +203,7 @@
for a, b in zip(NEW_AS, NEW_BS):
dists_summary[f"Elastic({a}, {b})"] = dists[a, b]

st.dataframe(dists_summary)
st.dataframe(dists_summary)
# SAVEFIG = True
# if SAVEFIG:
# figs_dir = os.path.join(work_dir, f"cells/saved_figs/{dataset_name}")
Expand Down
119 changes: 119 additions & 0 deletions cells/streamlit/cells/pages/1-Load_Data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import streamlit as st
import pandas as pd
import os
import time
import matplotlib.pyplot as plt

import sys
sys.path.append("/app/utils")

from utils.data_utils import build_rois, find_all_instances

current_time = time.localtime()

year = time.strftime("%Y", current_time)
day_of_year = time.strftime("%j", current_time)
time_string = time.strftime("%H%M%S", current_time)

current_time_string = f"{year}{day_of_year}-{time_string}"

if "cells_list" not in st.session_state:
st.session_state["cells_list"] = True

st.write("# Load Your Cell Data 👋")

st.markdown(
"""
## Getting Started

We currently support an ROI zip folder created by FIJI/ImageJ. What this means is you may have a folder structure as follows:
```
└── Cropped_Images
├── Bottom_plank_0
│ ├── Averaged_ROI
│ ├── Data
│ ├── Data_Filtered
│ ├── Labels
│ ├── OG
│ ├── Outlines
│ └── ROIs <---- Folder of zipped ROIs
```
You can simply upload this ROIs folder and we will load your data for you. We plan on supporting data given in `xy` coordinate format from `JSON` and CSV/TXT files.
Your chosen data structure __must__ contain `x` and `y` for the program to correctly parse and load your data.
"""
)

def get_files_from_folder(folder_path):
"""
Retrieves a list of files from a specific folder.

Parameters:
folder_path (str): The path to the folder.

Returns:
list: A list of file paths.

Example:
>>> folder_path = '/path/to/folder'
>>> files = get_files_from_folder(folder_path)
>>> print(files)
['/path/to/folder/file1.txt', '/path/to/folder/file2.csv', '/path/to/folder/file3.jpg']
"""
files = []
for filename in os.listdir(folder_path):
if os.path.isfile(os.path.join(folder_path, filename)):
files.append(os.path.join(folder_path, filename))
return files



# Specify the folder path for file uploads and save run with date and time
upload_folder = f"/app/data/run-{current_time_string}"

# Check if the upload folder exists, and create it if it doesn't
if not os.path.exists(upload_folder):
os.makedirs(upload_folder)
st.info(f"Upload folder created: {upload_folder}")

# Get the list of files in the upload folder
files = get_files_from_folder(upload_folder)



# Display the file uploader
uploaded_files = st.file_uploader("Upload a file", type=["zip"], accept_multiple_files=True)


# Process the uploaded files
if uploaded_files is not None:
progress_bar = st.progress(0)
total_files = len(uploaded_files)
completed_files = 0

for uploaded_file in uploaded_files:
file_path = os.path.join(upload_folder, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
completed_files += 1
progress = int((completed_files / total_files) * 100)
progress_bar.progress(progress)
# st.write(f"File saved: {file_path}")


# Build a dictionary of all the ROIs
dict_rois = build_rois(upload_folder)

# Extract the cells
cells_list = []
find_all_instances(dict_rois, 'x', 'y', cells_list)
st.session_state["cells_list"] = cells_list

st.write(f"Successfully Loaded {len(cells_list)} cells.")

# Sanity check visualization
cell_num = st.number_input(f"Visualize a cell. Pick a number between 0 and {len(cells_list)-1}", min_value=0)


fig, ax = plt.subplots()
ax.plot(cells_list[cell_num][:,0], cells_list[cell_num][:,1])
st.pyplot(fig)
122 changes: 90 additions & 32 deletions cells/streamlit/cells/pages/Cell_Shear.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,107 @@
import streamlit as st
import pandas as pd
from io import StringIO
import matplotlib.pyplot as plt

import geomstats.backend as gs
from geomstats.geometry.euclidean import Euclidean
from geomstats.geometry.discrete_curves import R2, DiscreteCurves, ClosedDiscreteCurves

from geomstats.learning.frechet_mean import FrechetMean
from geomstats.learning.kmeans import RiemannianKMeans
from geomstats.learning.mdm import RiemannianMinimumDistanceToMean
from geomstats.learning.pca import TangentPCA

from utils import experimental

st.write(st.session_state["cells_list"])

st.write("# Welcome to the Cell Shear Analysis App! 👋")

st.markdown(
"""
## Data Source

> Ehsan Sadeghipour, Miguel A Garcia, William James Nelson, Beth L Pruitt (2018) Shear-induced damped oscillations in an epithelium depend on actomyosin contraction and E-cadherin cell adhesion eLife 7:e39640 https://doi.org/10.7554/eLife.39640

![](https://raw.githubusercontent.com/amilworks/ece594n/728845ba67ef604d307be98f78b872aa4d4052a4/hw_project/PredictingCellShear/figs/Graphical_Abstract_V1%404x.png)
## Step Zero

# Introduction and Motivation
👈 If you have not already uploaded your data, please select the __Load Data__ page and follow the instructions. The format is important, so please read carefully.

Cell-cell shear, or the action of cells sliding past each other, has roles in development, disease, and wound healing. Throughout development cells are moving past each other in every stage of development. These biomechanical cues have influences on differentiation, cell shape, behavior, the proteome, and the transcriptome.
## Analyzing Cell Data

Previous research on shear focused on fluid shear so in this paper they focused on cell-cell shear which has been well characterized. Epithelial cells known as MDCK cells were used on a MEMS device which can be precisely displaced to create consistent cell-cell shear forces. Using new segmentation and machine learning techniques we are reanalyzing the data to use the changes in cell shape to predict cell behavior/migration.

Now we will start analyzing our data. The first step is preprocessing our data, specifically interpolating, removing duplicates, and quotienting.
"""
)

uploaded_file = st.file_uploader("Choose a file")
if uploaded_file is not None:
# To read file as bytes:
bytes_data = uploaded_file.getvalue()
st.write(bytes_data)

# To convert to a string based IO:
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
st.write(stringio)

# To read file as string:
string_data = stringio.read()
st.write(string_data)

# Can be used wherever a "file-like" object is accepted:
dataframe = pd.read_csv(uploaded_file)
st.write(dataframe)
st.set_page_config(
page_title="Hello",
page_icon="👋",
)
cells_list = st.session_state["cells_list"]

n_sampling_points = st.slider('Select the Number of Sampling Points', 0, 100, 50)
cells, cell_shapes = experimental.nolabel_preprocess(cells_list, len(cells_list), n_sampling_points)


R1 = Euclidean(dim=1)
CLOSED_CURVES_SPACE = ClosedDiscreteCurves(R2)
CURVES_SPACE = DiscreteCurves(R2)
SRV_METRIC = CURVES_SPACE.srv_metric
L2_METRIC = CURVES_SPACE.l2_curves_metric

ELASTIC_METRIC = {}
AS = [1, 2, 0.75, 0.5, 0.25, 0.01] #, 1.6] #, 1.4, 1.2, 1, 0.5, 0.2, 0.1]
BS = [0.5, 1, 0.5, 0.5, 0.5, 0.5] #, 2, 2, 2, 2, 2, 2, 2]
for a, b in zip(AS, BS):
ELASTIC_METRIC[a, b] = DiscreteCurves(R2, a=a, b=b).elastic_metric
METRICS = {}
METRICS["Linear"] = L2_METRIC
METRICS["SRV"] = SRV_METRIC


means = {}

means["Linear"] = gs.mean(cell_shapes, axis=0)
means["SRV"] = FrechetMean(
metric=SRV_METRIC,
method="default").fit(cell_shapes).estimate_


for a, b in zip(AS, BS):
means[a, b] = FrechetMean(
metric=ELASTIC_METRIC[a, b],
method="default").fit(cell_shapes).estimate_


fig = plt.figure(figsize=(18, 8))

ncols = len(means) // 2

for i, (mean_name, mean) in enumerate(means.items()):
ax = fig.add_subplot(2, ncols, i+1)
ax.plot(mean[:, 0], mean[:, 1], "black")
ax.set_aspect("equal")
ax.axis("off")
axs_title = mean_name
if mean_name not in ["Linear", "SRV"]:
a = mean_name[0]
b = mean_name[1]
ratio = a / (2 * b)
mean_name = f"Elastic {mean_name}\n a / (2b) = {ratio}"
ax.set_title(mean_name)

st.pyplot(fig)


fig = plt.figure(figsize=(18, 8))

ncols = len(means) // 2

for i, (mean_name, mean) in enumerate(means.items()):
ax = fig.add_subplot(2, ncols, i+1)
mean = CLOSED_CURVES_SPACE.projection(mean)
ax.plot(mean[:, 0], mean[:, 1], "black")
ax.set_aspect("equal")
ax.axis("off")
axs_title = mean_name
if mean_name not in ["Linear", "SRV"]:
a = mean_name[0]
b = mean_name[1]
ratio = a / (2 * b)
mean_name = f"Elastic {mean_name}\n a / (2b) = {ratio}"
ax.set_title(mean_name)

st.sidebar.success("Select a demo above.")
st.pyplot(fig)

100 changes: 100 additions & 0 deletions cells/streamlit/cells/utils/data_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import os
from read_roi import read_roi_zip
import numpy as np


def build_rois(path) -> dict:
"""
Builds a dictionary of region of interest (ROI) data from a directory of ROI files.

Parameters:
path (str): The path to the directory containing ROI files.

Returns:
dict: A dictionary where the keys are ROI names and the values are the corresponding ROI data.

Example:
>>> roi_directory = '/path/to/roi_directory'
>>> rois = build_rois(roi_directory)
>>> print(rois)
{'roi1': <ROI data>, 'roi2': <ROI data>, ...}
"""
rois = {}
for roi in sorted(os.listdir(path)):
# print(roi.split(".")[0])
rois[roi.split(".")[0]] = read_roi_zip(os.path.join(path,roi))
return rois


def find_key(dictionary, target_key):
"""
Recursively searches for a key in a nested dictionary.

Parameters:
dictionary (dict): The nested dictionary to search.
target_key (str): The key to find.

Returns:
object: The value associated with the target key, or None if the key is not found.

Example:
>>> data = {
... 'key1': {
... 'key2': {
... 'key3': 'value3',
... 'key4': 'value4'
... }
... }
... }
>>> result = find_key(data, 'key4')
>>> print(result)
value4
"""
if target_key in dictionary:
return dictionary[target_key]

for value in dictionary.values():
if isinstance(value, dict):
result = find_key(value, target_key)
if result is not None:
return result

return None


def find_all_instances(dictionary, target_key1, target_key2, results_list):
"""
Recursively finds instances of two target keys in a nested dictionary and appends their corresponding values together.

Parameters:
dictionary (dict): The nested dictionary to search.
target_key1 (hashable): The first target key to find.
target_key2 (hashable): The second target key to find.
results_list (list): The list where the corresponding values will be appended.

Returns:
None

Example:
>>> my_dict = {
... "a": 1,
... "b": {"c": 2, "d": 3},
... "e": {"f": 4, "g": {"a": 5, "c": 6}},
... "i": 7
... }
>>> target_key1 = "a"
>>> target_key2 = "c"
>>> instances = []
>>> find_all_instances(my_dict, target_key1, target_key2, instances)
>>> print(instances)
[5, 6]
"""
found_keys = set()
for key, value in dictionary.items():
if key == target_key1 or key == target_key2:
found_keys.add(key)
elif isinstance(value, dict):
find_all_instances(value, target_key1, target_key2, results_list)

if {target_key1, target_key2}.issubset(found_keys):
results_list.append(np.array([dictionary[target_key1], dictionary[target_key2]]).T)
Loading