Skip to content
This repository was archived by the owner on Sep 27, 2023. It is now read-only.

Tkaitchuck #79

Open
wants to merge 39 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
0f7137f
Clear cached rda files and regenerate test-data each run.
Jan 8, 2016
2809f35
Make sure we are using a matrix
Jan 8, 2016
b05be2e
Add new test options as examples.
Jan 8, 2016
24430fa
Assign cohort randomly.
Jan 8, 2016
785804b
Making parameters a mandatory argument.
Jan 8, 2016
d4b37a0
Merge branch 'master' of https://github.com/google/rappor
Jan 8, 2016
7133e45
Change python to use secure random numbers for the IRR
Feb 3, 2016
004af04
Merge commit '421d583' into tkaitchuck
Feb 3, 2016
68c946f
Merge commit 'c7c8629' into tkaitchuck
Feb 3, 2016
f3ede35
Merge commit '0419357' into tkaitchuck
Feb 3, 2016
56ab5da
Merge commit '96b7c27' into tkaitchuck
Feb 3, 2016
a8ebe48
Merge commit '6c3637d' into tkaitchuck
Feb 3, 2016
36bed99
Merge commit '54b94e3' into tkaitchuck
Feb 3, 2016
27b15bf
Merge commit '3c5377c' into tkaitchuck
Feb 3, 2016
3333f9f
Merge commit '7bdd711' into tkaitchuck
Feb 3, 2016
2a94a05
Merge commit '853c78b' into tkaitchuck
Feb 3, 2016
c99a3bd
Merge commit '9254e2d' into tkaitchuck
Feb 3, 2016
4e6fbbd
Merge commit '8a110fb' into tkaitchuck
Feb 3, 2016
bb169fd
Merge commit '6eb6ea1' into tkaitchuck
Feb 3, 2016
1de6f39
Added code to visualize the effects of different parameters.
Feb 4, 2016
3c06b15
Add better labels to paramerters
Feb 4, 2016
5259f63
Clear cached rda files and regenerate test-data each run.
Jan 8, 2016
74f1bf9
Make sure we are using a matrix
Jan 8, 2016
7857c78
Add new test options as examples.
Jan 8, 2016
65b02c3
Assign cohort randomly.
Jan 8, 2016
ac8d800
Making parameters a mandatory argument.
Jan 8, 2016
7fd35ca
Added code to visualize the effects of different parameters.
Feb 4, 2016
bfc6b2a
Add better labels to paramerters
Feb 4, 2016
fde6163
Merge branch 'tkaitchuck' of github.com:google/rappor into tkaitchuck
Feb 23, 2016
8b417c4
Remove unneeded changes introduced in merge
Feb 23, 2016
822495f
Switch logs to be base2.
Feb 24, 2016
8b7a3c7
Undo change to read_input.R
Feb 24, 2016
6d4ff16
Remove whitespace
tkaitchuck Feb 24, 2016
4c6a8f1
Remove whitespace
tkaitchuck Feb 24, 2016
8cffc74
Cleanup parameter_viz.
Feb 24, 2016
d587048
Merge branch 'tkaitchuck' of github.com:google/rappor into tkaitchuck
Feb 24, 2016
b02e667
Add code to print optimal P and Q values for a given h, f, k set.
Feb 25, 2016
87d5309
Better visualization of optimal parameters.
Feb 26, 2016
1266562
Compute optimal points and graph them.
Feb 26, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions analysis/R/decode.R
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ Decode <- function(counts, map, params, alpha = 0.05,
stop(error_msg)
}

counts = as.matrix(counts) # Make sure we are using a matrix
k <- params$k
p <- params$p
q <- params$q
Expand Down
7 changes: 6 additions & 1 deletion bin/decode_dist.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,17 @@ main <- function(opts) {
counts <- ReadCountsFile(opts$counts, params, adjust_counts = opts$adjust_counts_hack)
counts <- AdjustCounts(counts, params)


# The left-most column has totals.
num_reports <- sum(counts[, 1])

map <- LoadMapFile(opts$map, params)

val <- ValidateInput(params, counts, map$map) # NOTE: using global map
if (val != "valid") {
Log("ERROR: Invalid input: %s", val)
quit(status = 1)
}

Log("Decoding %d reports", num_reports)
res <- Decode(counts, map$map, params, correction = opts$correction,
alpha = opts$alpha)
Expand Down
159 changes: 159 additions & 0 deletions parameter_viz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from numpy import arange
from numpy import log
from numpy import linspace
from numpy import floor
from numpy import ceil
import math

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt


def pid(x, f, p, q):
return .5*f*(q**x + (1-q)**x) + (1-.5*f)*((1-p)**x + p**x)

def pother(x, f, p, q):
b = .5 * f * q + (1-.5*f) * p
return ( .5*f*(q*b + (1-q)*(1-b)) + (1-.5*f)*(p*b**(x-1) + (1-p)*(1-b)**(x-1)) )


def delta(x, f, p, q):
return pid(x,f,p,q) - pother(x,f,p,q)

def predictN(prob, delta):
if delta == 0:
return float('inf')
return max(1, prob * (1-prob) * 4 / (delta**2))

def valueIFPQ(i,f,p,q):
p_id = pid(i, f, p, q)
p_other = pother(i,f,p,q)
return predictN(max(p_id, p_other), abs(p_id - p_other))

def detetectionThreshold(f, p, q, h, k):
pStar = .5 * f * q + (1-.5*f) * p # Probability of a bit being 1 from a true value of 0 in the irr
qStar = (1-.5*f) * q + .5 * f * p # Probability of a bit being 0 from a true value of 1 in the irr
if k <= 1:
return float("inf")
probCollision = (1.0 * h) / k
qPrime = qStar*(1-probCollision) + (probCollision*pStar)
if pStar == qPrime:
return float("inf")
elif pStar < qPrime:
return predictN(pStar**h, qPrime**h - pStar**h)
else:
return predictN((1-pStar)**h, (1-qPrime)**h - (1-pStar)**h)

def printDelta(x):
printDelta(x[1], x[0], x[2])

def printDelta(f, p, q):
for i in range(2,10):
p_id = pid(i,f,p,q)
p_other = pother(i,f,p,q)
print(i, p_id, ' vs ', p_other, ' delta ', p_id-p_other, ' for a sum of ', predictN(max(p_id,p_other), p_id-p_other))

def toPow2(x):
if x<=1:
return 0
return 2**floor(log(x)/log(2))

def eInf(f, h):
if f <= 1.0:
return 2 * h * log( (1-.5*f)/(.5*f) ) / log(2)
else:
return 2 * h * log( (.5*f)/(1-.5*f) ) / log(2)

def getData():
for h in (1, 2):
for f in (.125,.2,.25,.3,.4,.5,.75,1,1.25,1.5,1.75) :
for p in (.0,.1,.2,.3,.4,.5,.6,.7,.8,.9) :
for q in (.15,.25,.35,.45,.55,.65,.75,.85,1) :
maxk = toPow2(valueIFPQ(2,f,p,q))
detThres = detetectionThreshold(f, p, q, h, maxk)
e = eInf(f,h)
tradeoff = eInf(f,h) * detThres
if not math.isinf(detThres) and e < 10:
yield (f, p, q, h, maxk, e, detThres, valueIFPQ(2,f,p,q), valueIFPQ(10000,f,p,q), tradeoff)

def toColor(color):
x = max(1, min(255, int(round(color * 256.0))))
return hex(x*256*256 + x*256 + x)[2:]

def makePlot(pointGenerator):
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
for f, p, q, h, maxk, e, detThres, val2, val10000, tradeoff in pointGenerator():
ax.scatter(e, log(val10000)/log(2), log(ceil(detThres))/log(2), s=h*h*20, c=(0.5*f,p,q), marker='o')
ax.view_init(elev=20.,azim=45)
ax.invert_zaxis()
ax.set_xlabel('e \n Epsilon of privacy bound')
ax.set_ylabel('log(val10000) \n Log of number of bits of K needed to form a identifier that could distinguish two users')
ax.set_zlabel('Detectability theashold \n The log base 2 of the number of repports needed to detect a value')
ax.text(1,10,1,"Good")
ax.text(9,-2,12,"Bad")
plt.show()


def value(f, p, q, h, k):
maxk = floor(valueIFPQ(2,f,p,q))
if maxk < k:
return float("inf")
return detetectionThreshold(f, p, q, h, k)


def getOptimalPQ():
epislons = {}
for f in (.75,.5,.4,.333,.25,.2,.15,.125):
for h in (1,2):
e = eInf(f,h)
epislons[e] = (f,h)

smallestTradeoff = float("inf")
for e in sorted(epislons.iterkeys()):
f = epislons[e][0]
h = epislons[e][1]
smallestTradeoffForE = float("inf")
if h>1 and f<.4:
continue
for k in (8,32,64,126,256):
lowest = float("inf")
for p in linspace(0.0,1.0,101):
for q in linspace(0.0,1.0,101):
detThres = value(f, p, q, h, k)
if detThres < lowest:
lowest = detThres
for p in linspace(0.0,1.0,101):
for q in linspace(0.0,1.0,101):
p=round(p,4)
q=round(q,4)
detThres = value(f, p, q, h, k)
if not math.isinf(detThres) and detThres < lowest * 1.01:
tradeoff = e * detThres
if tradeoff < smallestTradeoff:
yield ( f, p, q, h, k, e, detThres, valueIFPQ(2,f,p,q), valueIFPQ(10000,f,p,q), tradeoff )
if tradeoff < smallestTradeoffForE:
smallestTradeoffForE = tradeoff
if smallestTradeoffForE < smallestTradeoff:
smallestTradeoff = smallestTradeoffForE

def printOptimalPQ():
print("Optimal choices for P and Q for varrious values:")
for f, p, q, h, k, e, detThres, val2, val10000, tradeoff in getOptimalPQ():
print( 'h={}, k={:3}, f={:4}, p={:4}, q={:4}, epislon={:5}, detThres={}'.format(h,k,f,p,q, round(e,4), ceil(detThres)) )

print
print("Showing a plot shoing various points in the space. (Not nessicarly optimal ones)")
makePlot(getData)
print
print("Computing points on the optimal frontier")
printOptimalPQ()
print
print("Plotting the optimal set")
makePlot(getOptimalPQ)

print("As you can see the optimal points have a few properties in common:")
print("h is always 1")
print("f is never below .2 (Though the exact lower bound of this threashold will require more experimentation)")
print("P and Q are always some extreme (one of them is either 1 or 0)")
print("Higher values of K corilate with higher values of F")
2 changes: 1 addition & 1 deletion tests/gen_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ main <- function(argv) {

params <- ReadParameterFile(params_file)

true_map <- ReadMapFile(true_map_file)
true_map <- ReadMapFile(true_map_file, params)

num_unique_values <- length(true_map$strs)

Expand Down