@@ -708,7 +708,7 @@ conda activate authentication
708
708
We navigate to the working directory:
709
709
710
710
``` bash
711
- cd metadmg/figures
711
+ cd metadmg/
712
712
```
713
713
714
714
We load R by running ` R ` in your terminal
@@ -738,7 +738,7 @@ We will use the function `get_dmg_decay_fit` to visualise damage pattern (@fig-a
738
738
The function is saved in ` metadmg/script/ ` , so we only need to run the following command to recall it:
739
739
740
740
``` {r eval=F}
741
- source("metadmg/ script/get_dmg_decay_fit.R")
741
+ source("script/get_dmg_decay_fit.R")
742
742
```
743
743
744
744
But if you are curious and want to know how it works, here is the function itself:
@@ -848,10 +848,10 @@ get_dmg_decay_fit <- function(df, orient = "fwd", pos = 30, p_breaks = c(0, 0.1,
848
848
}
849
849
```
850
850
851
- We load our metaDMG output data (tsv file) and generate the damage plots as seen in @fig-authentication-fagusovisdmg using the function ` get-damage ` .
851
+ We load our metaDMG output data (tsv file) and the metadata with information on the age of each sample. We generate the damage plots as seen in @fig-authentication-fagusovisdmg using the function ` get-damage ` .
852
852
853
853
``` {r eval=F}
854
- df <- read.csv("metadmg/ concatenated_metaDMGfinal.tsv", sep = "\t")
854
+ df <- read.csv("concatenated_metaDMGfinal.tsv", sep = "\t")
855
855
856
856
#Rename sample column
857
857
colnames(df)[colnames(df) == 'filename'] <- 'sample'
@@ -864,6 +864,17 @@ df$sample[df$sample == "VM-17_aggregated_results.stat"] <- "VM-17"
864
864
df$sample[df$sample == "VM-19_aggregated_results.stat"] <- "VM-19"
865
865
df$sample[df$sample == "VM-3_aggregated_results.stat"] <- "VM-3"
866
866
867
+ #Import the metadata with dates BP
868
+ depth_data <- read.csv ("figures/depth_data.csv", header = TRUE)
869
+ View (depth_data)
870
+
871
+ #Merge context_data and depth_data with dataframe (adding new column for dates BP)
872
+ df$new <- depth_data$Date_BP[match(df$sample, depth_data$Sample_ID)]
873
+ names(df)[names(df) == 'new'] <- 'Date_BP'
874
+
875
+ # Convert Date_BP columns to factors (categorical variable)
876
+ df$Date_BP <- as.factor(df$Date_BP)
877
+
867
878
#Setting filtering theshold for ancient reads
868
879
minDMG = 0.02 # filter criteria, plot only taxa above set value
869
880
zfit = 2 # minimum significance, the higher the better, 2 would mean that we estimante the damage with 95% confidence.
@@ -881,7 +892,6 @@ X <- tax_g_list
881
892
purrr::map(tax_g_list, function(X, nrank) {
882
893
sel_tax <- dt1 %>%
883
894
rename(label = sample) %>%
884
- #filter(Genome_Id %in% (tax_superg %>% filter(Taxa_Super_Groups == X) %>% pull(Genome_Id))) %>%
885
895
filter(name == X) %>%
886
896
filter(rank == rank) %>%
887
897
select(name, label) %>%
@@ -899,7 +909,7 @@ purrr::map(tax_g_list, function(X, nrank) {
899
909
get_dmg_decay_fit(df = dt1 %>% rename(label = sample) %>% inner_join(sel_tax) %>% filter(rank == rank), orient = "rev", y_max = 0.70) +
900
910
ggtitle(paste0(X, " nreads=", n_readsa, " Reverse"))
901
911
), align = "hv")
902
- ggsave(paste0(X, "-dmg.pdf"), plot = last_plot(), width = 8, height = 4)
912
+ ggsave(paste0("figures/", X, "-dmg.pdf"), plot = last_plot(), width = 8, height = 4)
903
913
}
904
914
})
905
915
```
@@ -912,88 +922,60 @@ We provide an R script to investigate the main statistics.
912
922
Here we visualise the amplitude of damage (A) and its significance (Zfit), for the full dataset but filtering it to a minimum of 100 reads and at the genus level (@fig-authentication-fig8 ).
913
923
914
924
``` {r eval=F}
915
- #Subset dataset at genus level
916
- dt2 <- df %>% filter(grepl("\\bgenus\\b", rank))
917
-
918
- #plotting amplitude of damage vs its significance
919
- p1 <- ggplot(dt2, aes(y=A, x=Zfit)) +
920
- geom_point(aes(size=nreads), col ="dark green") +
921
- theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust =1)) +
922
- scale_size_continuous(labels = function(x) format(x, scientific = FALSE)) +
923
- xlab("significance") + ylab("damage") + theme_minimal()
924
- p1
925
-
926
- #Plotting only animals and plants at genus level
927
- #subset dataset
928
- dt3 <- dt2 %>% filter(nreads > 100, grepl("\\bgenus\\b", rank), grepl("Metazoa", taxa_path) | grepl("Viridiplant", taxa_path))
925
+ #Subset dataset animal and plants at the genus level
926
+ dt2 <- df %>% filter(nreads > 100, grepl("\\bgenus\\b", rank), grepl("Metazoa", taxa_path) | grepl("Viridiplant", taxa_path))
929
927
930
928
#Adding factor column for Kingdom
931
- dt3 <- dt3 %>%
929
+ dt2 <- dt2 %>%
932
930
mutate(Kingdom = # creating our new column
933
931
case_when(grepl("Viridiplant", taxa_path) ~ "Viridiplantae",
934
932
grepl("Metazoa",taxa_path) ~ "Metazoa"))
935
933
936
- #Plotting amplitude of damage vs its significance
937
- p2 <- ggplot(dt3, aes(y=A, x=Zfit)) +
934
+ #Plotting amplitude of damage vs its significance and saving as pdf file
935
+ pdf(file = "figures/p1.pdf", width = 8, height = 6)
936
+ ggplot(dt2, aes(y=A, x=Zfit)) +
938
937
geom_point(aes(size=nreads, col=Kingdom)) +
939
938
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust =1)) +
940
939
scale_color_manual(values = c("#8B1A1A", "#458B00"))+
941
940
scale_size_continuous(labels = function(x) format(x, scientific = FALSE)) +
942
941
xlab("significance") + ylab("damage") + theme_minimal()
943
- p2
944
-
945
- #Save the plots as a PDF file
946
- ggsave("p1.pdf", plot = p1, width = 8, height = 6)
947
- ggsave("p2.pdf", plot = p2, width = 8, height = 6)
942
+ dev.off()
948
943
```
949
944
950
945
![ Amplitude of damage (A) vs significance (Zfit) for animals and plants.] ( assets/images/chapters/authentication/p2.png ) {#fig-authentication-fig8}
951
946
952
947
### Amplitude of damage and mean fragment length through time
953
948
954
- Here we visualise the amplitude of damage (A) and the mean length of the fragments (mean_rlen) by depth and by date (BP) for the full dataset but filtering it to a minimum of 100 reads and at the genus level (@fig-authentication-fig9 ).
949
+ Here we visualise the amplitude of damage (A) and the mean length of the fragments (mean_rlen) by date (BP) for the filtered dataset with a minimum of 100 reads and at the genus level (@fig-authentication-fig9 ).
955
950
956
951
``` {r eval=F}
957
- #Import the metadata
958
- depth_data <- read.csv ("metadmg/figures/depth_data.csv", sep = ",")
959
- View (depth_data)
960
-
961
- #Merge context_data and depth_data with dataframe (adding new column for dates BP)
962
- df$new <- depth_data$Date_BP[match(df$sample, depth_data$Sample_ID)]
963
- names(df)[names(df) == 'new'] <- 'Date_BP'
964
-
965
- # Convert Date_BP columns to factors (categorical variable)
966
- df$Date_BP <- as.factor(df$Date_BP)
967
-
968
952
#Plotting damage (A) by period (dates BP)
969
- p3a <- dt4 %>%
953
+ p2a <- dt2 %>%
970
954
mutate(Date_BP = fct_relevel(Date_BP,
971
955
"6100","5300","4100","3900","3000", "800")) %>%
972
956
ggplot(aes(x=A, y=Date_BP))+
973
957
geom_boxplot(aes(x=A, y=Date_BP, fill = sample))+
974
958
geom_point(aes(fill = sample), size = 3, shape = 21, color = "black", stroke = .5) +
975
959
scale_x_continuous(limits = c(0, 0.20), breaks = seq(0, 0.20, by = 0.05)) +
976
960
theme_minimal() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
977
- p3a
961
+ p2a
978
962
979
963
#Plotting mean length (mean_rlen) by period (dates BP)
980
- p3b <- dt4 %>%
964
+ p2b <- dt2 %>%
981
965
mutate(Date_BP = fct_relevel(Date_BP,
982
966
"6100","5300","4100","3900","3000", "800")) %>%
983
967
ggplot(aes(x=mean_rlen, y=Date_BP))+
984
968
geom_boxplot(aes(x=mean_rlen, y=Date_BP, fill = sample)) +
985
969
geom_point(aes(fill = sample), size = 3, shape = 21, color = "black", stroke = .5) +
986
970
scale_x_continuous(limits = c(30, 80), breaks = seq(30, 80, by = 10)) +
987
971
theme_minimal() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
988
- p3b
972
+ p2b
989
973
990
- #Combining the plots
991
- p3 <- grid.arrange(p3a, p3b,
974
+ #Combining the plots and saving as pdf file
975
+ pdf(file = "figures/p2.pdf", width = 8, height = 6)
976
+ p2 <- grid.arrange(p2a, p2b,
992
977
ncol = 2, nrow = 1)
993
- p3
994
-
995
- #Save plot
996
- ggsave("p3.pdf", plot = p4, width = 10, height = 8)
978
+ dev.off()
997
979
```
998
980
999
981
![ Amplitude of damage (A) and mean fragment length (mean_rlen) through time.] ( assets/images/chapters/authentication/p3.png ) {#fig-authentication-fig9}
0 commit comments