Skip to content

Allowing slope to use the true marginal importance weight for mips #172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 15, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 73 additions & 30 deletions obp/ope/estimators_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def estimate_policy_value(
position=position,
pi_b=pi_b,
action_dist=action_dist,
p_e_a=p_e_a,
)
elif self.embedding_selection_method == "greedy":
return self._estimate_with_greedy_pruning(
Expand All @@ -313,6 +314,7 @@ def estimate_policy_value(
position=position,
pi_b=pi_b,
action_dist=action_dist,
p_e_a=p_e_a,
)
else:
return self._estimate_round_rewards(
Expand All @@ -335,6 +337,7 @@ def _estimate_with_exact_pruning(
pi_b: np.ndarray,
action_dist: np.ndarray,
position: np.ndarray,
p_e_a: Optional[np.ndarray] = None,
) -> float:
"""Apply an exact version of data-drive action embedding selection."""
n_emb_dim = action_embed.shape[1]
Expand All @@ -344,16 +347,29 @@ def _estimate_with_exact_pruning(
comb_list = list(itertools.combinations(feat_list, i))
theta_list_, cnf_list_ = [], []
for comb in comb_list:
theta, cnf = self._estimate_round_rewards(
context=context,
reward=reward,
action=action,
action_embed=action_embed[:, comb],
pi_b=pi_b,
action_dist=action_dist,
position=position,
with_dev=True,
)
if p_e_a is None:
theta, cnf = self._estimate_round_rewards(
context=context,
reward=reward,
action=action,
action_embed=action_embed[:, comb],
pi_b=pi_b,
action_dist=action_dist,
position=position,
with_dev=True,
)
else:
theta, cnf = self._estimate_round_rewards(
context=context,
reward=reward,
action=action,
action_embed=action_embed[:, comb],
pi_b=pi_b,
action_dist=action_dist,
position=position,
p_e_a=p_e_a[:, :, comb],
with_dev=True,
)
if len(theta_list) > 0:
theta_list_.append(theta), cnf_list_.append(cnf)
else:
Expand All @@ -380,23 +396,37 @@ def _estimate_with_greedy_pruning(
pi_b: np.ndarray,
action_dist: np.ndarray,
position: np.ndarray,
p_e_a: Optional[np.ndarray] = None,
) -> float:
"""Apply a greedy version of data-drive action embedding selection."""
n_emb_dim = action_embed.shape[1]
theta_list, cnf_list = [], []
current_feat, C = np.arange(n_emb_dim), np.sqrt(6) - 1

# init
theta, cnf = self._estimate_round_rewards(
context=context,
reward=reward,
action=action,
action_embed=action_embed[:, current_feat],
pi_b=pi_b,
action_dist=action_dist,
position=position,
with_dev=True,
)
if p_e_a is None:
theta, cnf = self._estimate_round_rewards(
context=context,
reward=reward,
action=action,
action_embed=action_embed[:, current_feat],
pi_b=pi_b,
action_dist=action_dist,
position=position,
with_dev=True,
)
else:
theta, cnf = self._estimate_round_rewards(
context=context,
reward=reward,
action=action,
action_embed=action_embed[:, current_feat],
pi_b=pi_b,
action_dist=action_dist,
position=position,
p_e_a=p_e_a[:, :, current_feat],
with_dev=True,
)
theta_list.append(theta), cnf_list.append(cnf)

# iterate
Expand All @@ -405,16 +435,29 @@ def _estimate_with_greedy_pruning(
for d in current_feat:
idx_without_d = np.where(current_feat != d, True, False)
candidate_feat = current_feat[idx_without_d]
theta, cnf = self._estimate_round_rewards(
context=context,
reward=reward,
action=action,
action_embed=action_embed[:, candidate_feat],
pi_b=pi_b,
action_dist=action_dist,
position=position,
with_dev=True,
)
if p_e_a is None:
theta, cnf = self._estimate_round_rewards(
context=context,
reward=reward,
action=action,
action_embed=action_embed[:, candidate_feat],
pi_b=pi_b,
action_dist=action_dist,
position=position,
with_dev=True,
)
else:
theta, cnf = self._estimate_round_rewards(
context=context,
reward=reward,
action=action,
action_embed=action_embed[:, candidate_feat],
pi_b=pi_b,
action_dist=action_dist,
position=position,
p_e_a=p_e_a[:, :, candidate_feat],
with_dev=True,
)
d_list_.append(d)
theta_list_.append(theta), cnf_list_.append(cnf)

Expand Down