Skip to content

🔥 Removed and renamed files for PyPi release #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions cells/streamlit/cellgeometry/Hello.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import streamlit as st




st.set_page_config(
page_title="Welcome",
page_icon="👋",
)

st.write("# Welcome to the Cell Shape Analysis App! 👋")

st.sidebar.success("Select a demo above.")

st.markdown(
"""
Geomstats is an open-source Python package for computations, statistics, and machine learning on nonlinear manifolds. Data from many application fields are elements of manifolds. For instance, the manifold of 3D rotations SO(3) naturally appears when performing statistical learning on articulated objects like the human spine or robotics arms.
**

👈 Select a demo from the sidebar**
"""
)
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
import os
import subprocess
import geomstats.backend as gs
import streamlit as st
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA

from scipy.optimize import linear_sum_assignment as linear_assignment
from sklearn import manifold
from joblib import Parallel, delayed
from numba import jit, njit, prange

from geomstats.geometry.euclidean import Euclidean
from geomstats.geometry.discrete_curves import R2, DiscreteCurves, ClosedDiscreteCurves

from geomstats.learning.frechet_mean import FrechetMean
from geomstats.learning.kmeans import RiemannianKMeans
from geomstats.learning.mdm import RiemannianMinimumDistanceToMean
from geomstats.learning.pca import TangentPCA

import sys
sys.path.append("/app/utils")

# import utils
from utils import experimental as experimental
from utils import basic as basic




st.set_page_config(page_title="Elastic Metric for Cell Boundary Analysis", page_icon="📈")

st.markdown("# Shape Analysis of Cancer Cells")
st.sidebar.header("Shape Analysis of Cancer Cells")
st.write(
"""This notebook studies Osteosarcoma (bone cancer) cells and the impact of drug treatment on their morphological shapes, by analyzing cell images obtained from fluorescence microscopy.

This analysis relies on the elastic metric between discrete curves from Geomstats. We will study to which extent this metric can detect how the cell shape is associated with the response to treatment."""
)

dataset_name = "osteosarcoma"

n_sampling_points = st.slider('Select the Number of Sampling Points', 0, 250, 100)
n_cells = 650
# n_sampling_points = 100
labels_a_name = "lines"
labels_b_name = "treatments"

quotient = ["rotation"] #["scaling"] #, "rotation"]
do_not_quotient = False


if dataset_name == "osteosarcoma":
cells, cell_shapes, labels_a, labels_b = experimental.load_treated_osteosarcoma_cells(
n_cells=n_cells, n_sampling_points=n_sampling_points, quotient=quotient
)
else:
pass


labels_a_dict = {lab: i_lab for i_lab, lab in enumerate(np.unique(labels_a))}
labels_b_dict = {lab: i_lab for i_lab, lab in enumerate(np.unique(labels_b))}

print(f"Dictionary associated to label \"{labels_a_name}\":")
print(labels_a_dict)
print(f"Dictionary associated to label \"{labels_b_name}\":")
print(labels_b_dict)

if do_not_quotient:
cell_shapes = cells

n_cells_to_plot = 10

fig = plt.figure(figsize=(16, 6))
count = 1
for label_b in np.unique(labels_b):
for i_lab_a, label_a in enumerate(np.unique(labels_a)):
cell_data = [cell for cell, lab_a, lab_b in zip(cell_shapes, labels_a, labels_b) if lab_a == label_a and lab_b == label_b]
for i_to_plot in range(n_cells_to_plot):
cell = gs.random.choice(a=cell_data)
fig.add_subplot(len(np.unique(labels_b)), len(np.unique(labels_a)) * n_cells_to_plot, count)
count += 1
plt.plot(cell[:, 0], cell[:, 1], color=f"C{i_lab_a}" )
plt.axis("equal")
plt.axis("off")
if i_to_plot == n_cells_to_plot // 2:
plt.title(f"{label_a} - {label_b}", fontsize=20)
st.pyplot(fig)

# Define shape space
R1 = Euclidean(dim=1)
CLOSED_CURVES_SPACE = ClosedDiscreteCurves(R2)
CURVES_SPACE = DiscreteCurves(R2)
SRV_METRIC = CURVES_SPACE.srv_metric
L2_METRIC = CURVES_SPACE.l2_curves_metric

ELASTIC_METRIC = {}
AS = [1, 2, 0.75, 0.5, 0.25, 0.01] #, 1.6] #, 1.4, 1.2, 1, 0.5, 0.2, 0.1]
BS = [0.5, 1, 0.5, 0.5, 0.5, 0.5] #, 2, 2, 2, 2, 2, 2, 2]
for a, b in zip(AS, BS):
ELASTIC_METRIC[a, b] = DiscreteCurves(R2, a=a, b=b).elastic_metric
METRICS = {}
METRICS["Linear"] = L2_METRIC
METRICS["SRV"] = SRV_METRIC

means = {}

means["Linear"] = gs.mean(cell_shapes, axis=0)
means["SRV"] = FrechetMean(
metric=SRV_METRIC,
method="default").fit(cell_shapes).estimate_

for a, b in zip(AS, BS):
means[a, b] = FrechetMean(
metric=ELASTIC_METRIC[a, b],
method="default").fit(cell_shapes).estimate_

st.header("Sample Means")
st.markdown("We compare results when computing the mean cell versus the mean cell shapes with different elastic metrics.")
fig = plt.figure(figsize=(18, 8))

ncols = len(means) // 2

for i, (mean_name, mean) in enumerate(means.items()):
ax = fig.add_subplot(2, ncols, i+1)
ax.plot(mean[:, 0], mean[:, 1], "black")
ax.set_aspect("equal")
ax.axis("off")
axs_title = mean_name
if mean_name not in ["Linear", "SRV"]:
a = mean_name[0]
b = mean_name[1]
ratio = a / (2 * b)
mean_name = f"Elastic {mean_name}\n a / (2b) = {ratio}"
ax.set_title(mean_name)

st.pyplot(fig)



fig = plt.figure(figsize=(18, 8))

ncols = len(means) // 2

for i, (mean_name, mean) in enumerate(means.items()):
ax = fig.add_subplot(2, ncols, i+1)
mean = CLOSED_CURVES_SPACE.projection(mean)
ax.plot(mean[:, 0], mean[:, 1], "black")
ax.set_aspect("equal")
ax.axis("off")
axs_title = mean_name
if mean_name not in ["Linear", "SRV"]:
a = mean_name[0]
b = mean_name[1]
ratio = a / (2 * b)
mean_name = f"Elastic {mean_name}\n a / (2b) = {ratio}"
ax.set_title(mean_name)


st.markdown("__Remark:__ Unfortunately, there are some numerical issues with the projection in the space of closed curves, as shown by the V-shaped results above.")

st.markdown("Since ratios of 1 give the same results as for the SRV metric, we only select AS, BS with a ratio that is not 1 for the elastic metrics.")

st.markdown("We also continue the analysis with the space of open curves, as opposed to the space of closed curves, for the numerical issues observed above.")


NEW_AS = [0.75, 0.5, 0.25, 0.01] #, 1.6] #, 1.4, 1.2, 1, 0.5, 0.2, 0.1]
NEW_BS = [0.5, 0.5, 0.5, 0.5] #, 2, 2, 2, 2, 2, 2, 2]

st.markdown("## Distances to the Mean")

# We multiply the distances by a 100, for visualization purposes. It amounts to a change of units.
dists = {}

dists["Linear"] = [100 * gs.linalg.norm(means["Linear"] - cell) / n_sampling_points for cell in cell_shapes]

dists["SRV"] = [
100 * SRV_METRIC.dist(means["SRV"], cell) / n_sampling_points for cell in cell_shapes
]

for a, b in zip(NEW_AS, NEW_BS):
dists[a, b] = [
100 * ELASTIC_METRIC[a, b].dist(means[a, b], cell) / n_sampling_points for cell in cell_shapes
]


dists_summary = pd.DataFrame(
data={
labels_a_name: labels_a,
labels_b_name: labels_b,
"Linear": dists["Linear"],
"SRV": dists["SRV"],
}
)

for a, b in zip(NEW_AS, NEW_BS):
dists_summary[f"Elastic({a}, {b})"] = dists[a, b]

st.dataframe(dists_summary)
# SAVEFIG = True
# if SAVEFIG:
# figs_dir = os.path.join(work_dir, f"cells/saved_figs/{dataset_name}")
# if not os.path.exists(figs_dir):
# os.makedirs(figs_dir)
# print(f"Will save figs to {figs_dir}")
# from datetime import datetime

# now = datetime.now().strftime("%Y%m%d_%H_%M_%S")
# print(now)
119 changes: 119 additions & 0 deletions cells/streamlit/cellgeometry/pages/1-Load_Data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import streamlit as st
import pandas as pd
import os
import time
import matplotlib.pyplot as plt

import sys
sys.path.append("/app/utils")

from utils.data_utils import build_rois, find_all_instances

current_time = time.localtime()

year = time.strftime("%Y", current_time)
day_of_year = time.strftime("%j", current_time)
time_string = time.strftime("%H%M%S", current_time)

current_time_string = f"{year}{day_of_year}-{time_string}"

if "cells_list" not in st.session_state:
st.session_state["cells_list"] = True

st.write("# Load Your Cell Data 👋")

st.markdown(
"""
## Getting Started

We currently support an ROI zip folder created by FIJI/ImageJ. What this means is you may have a folder structure as follows:
```
└── Cropped_Images
├── Bottom_plank_0
│ ├── Averaged_ROI
│ ├── Data
│ ├── Data_Filtered
│ ├── Labels
│ ├── OG
│ ├── Outlines
│ └── ROIs <---- Folder of zipped ROIs
```
You can simply upload this ROIs folder and we will load your data for you. We plan on supporting data given in `xy` coordinate format from `JSON` and CSV/TXT files.
Your chosen data structure __must__ contain `x` and `y` for the program to correctly parse and load your data.
"""
)

def get_files_from_folder(folder_path):
"""
Retrieves a list of files from a specific folder.

Parameters:
folder_path (str): The path to the folder.

Returns:
list: A list of file paths.

Example:
>>> folder_path = '/path/to/folder'
>>> files = get_files_from_folder(folder_path)
>>> print(files)
['/path/to/folder/file1.txt', '/path/to/folder/file2.csv', '/path/to/folder/file3.jpg']
"""
files = []
for filename in os.listdir(folder_path):
if os.path.isfile(os.path.join(folder_path, filename)):
files.append(os.path.join(folder_path, filename))
return files



# Specify the folder path for file uploads and save run with date and time
upload_folder = f"/app/data/run-{current_time_string}"

# Check if the upload folder exists, and create it if it doesn't
if not os.path.exists(upload_folder):
os.makedirs(upload_folder)
st.info(f"Upload folder created: {upload_folder}")

# Get the list of files in the upload folder
files = get_files_from_folder(upload_folder)



# Display the file uploader
uploaded_files = st.file_uploader("Upload a file", type=["zip"], accept_multiple_files=True)


# Process the uploaded files
if uploaded_files is not None:
progress_bar = st.progress(0)
total_files = len(uploaded_files)
completed_files = 0

for uploaded_file in uploaded_files:
file_path = os.path.join(upload_folder, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
completed_files += 1
progress = int((completed_files / total_files) * 100)
progress_bar.progress(progress)
# st.write(f"File saved: {file_path}")


# Build a dictionary of all the ROIs
dict_rois = build_rois(upload_folder)

# Extract the cells
cells_list = []
find_all_instances(dict_rois, 'x', 'y', cells_list)
st.session_state["cells_list"] = cells_list

st.write(f"Successfully Loaded {len(cells_list)} cells.")

# Sanity check visualization
cell_num = st.number_input(f"Visualize a cell. Pick a number between 0 and {len(cells_list)-1}", min_value=0)


fig, ax = plt.subplots()
ax.plot(cells_list[cell_num][:,0], cells_list[cell_num][:,1])
st.pyplot(fig)
Loading