Skip to content

Commit 746deea

Browse files
authored
--with-missing-fields: fill missing AD with {DP},0 (#30)
1 parent b25626d commit 746deea

File tree

1 file changed

+22
-3
lines changed

1 file changed

+22
-3
lines changed

src/spVCF.cc

+22-3
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ class DecoderImpl : public TranscoderBase {
541541
bool with_missing_fields_;
542542
string format_;
543543
vector<string> format_split_;
544-
int iAD_ = -1, iPL_ = -1;
544+
int iGT_ = -1, iDP_ = -1, iAD_ = -1, iPL_ = -1;
545545
// temp buffers used in add_missing_fields (to reduce allocations)
546546
OStringStream format_buffer_;
547547
string entry_copy_;
@@ -623,7 +623,11 @@ const char *DecoderImpl::ProcessLine(char *input_line) {
623623
split(format_copy, ':', back_inserter(format_split));
624624
for (int j = 0; j < format_split.size(); j++) {
625625
char *s = format_split[j];
626-
if (!strcmp(s, "AD")) {
626+
if (!strcmp(s, "GT")) {
627+
iGT_ = j;
628+
} else if (!strcmp(s, "DP")) {
629+
iDP_ = j;
630+
} else if (!strcmp(s, "AD")) {
627631
iAD_ = j;
628632
} else if (!strcmp(s, "PL")) {
629633
iPL_ = j;
@@ -711,6 +715,7 @@ const char *DecoderImpl::ProcessLine(char *input_line) {
711715
// Most missing fields are just represented by . except for AD and PL, which we pad with . to the
712716
// correct vector length. (In principle we should do that for any Number={A,G,R} field, but this
713717
// suffices for our practical need for this feature.)
718+
// For non-variant genotypes we also fill AD={DP},0 if it'd otherwise be missing.
714719
void DecoderImpl::add_missing_fields(const char *entry, int n_alt, string &ans) {
715720
format_buffer_.Clear();
716721
entry_copy_ = entry;
@@ -725,7 +730,21 @@ void DecoderImpl::add_missing_fields(const char *entry, int n_alt, string &ans)
725730
if (i == iAD_ && (!present || !strcmp(field, "."))) {
726731
// FIXME: handle multiallelic. Meaning we need to run this on sparse entries too.
727732
// const int nAD = n_alt + 1;
728-
format_buffer_ << ".,.";
733+
const char *gt = nullptr;
734+
if (iGT_ >= 0 && iGT_ < entry_fields_.size()) {
735+
gt = entry_fields_[iGT_];
736+
}
737+
const char *dp = nullptr;
738+
if (iDP_ >= 0 && iDP_ < entry_fields_.size()) {
739+
dp = entry_fields_[iDP_];
740+
}
741+
if (gt && dp &&
742+
(strcmp(gt, "0/0") || strcmp(gt, "0|0") || strcmp(gt, "./.") ||
743+
strcmp(gt, ".|."))) {
744+
format_buffer_ << dp << ",0";
745+
} else {
746+
format_buffer_ << ".,.";
747+
}
729748
} else if (i == iPL_ && (!present || !strcmp(field, "."))) {
730749
// const int nPL = (n_alt + 1) * (n_alt + 2) / 2;
731750
format_buffer_ << ".,.,.";

0 commit comments

Comments
 (0)