diff --git a/analysis/R/decode.R b/analysis/R/decode.R index 3311a121..02e2e7d9 100644 --- a/analysis/R/decode.R +++ b/analysis/R/decode.R @@ -345,6 +345,7 @@ Decode <- function(counts, map, params, alpha = 0.05, stop(error_msg) } + counts = as.matrix(counts) # Make sure we are using a matrix k <- params$k p <- params$p q <- params$q diff --git a/bin/decode_dist.R b/bin/decode_dist.R index 5c83f741..5907b1ff 100755 --- a/bin/decode_dist.R +++ b/bin/decode_dist.R @@ -89,12 +89,17 @@ main <- function(opts) { counts <- ReadCountsFile(opts$counts, params, adjust_counts = opts$adjust_counts_hack) counts <- AdjustCounts(counts, params) - # The left-most column has totals. num_reports <- sum(counts[, 1]) map <- LoadMapFile(opts$map, params) + val <- ValidateInput(params, counts, map$map) # NOTE: using global map + if (val != "valid") { + Log("ERROR: Invalid input: %s", val) + quit(status = 1) + } + Log("Decoding %d reports", num_reports) res <- Decode(counts, map$map, params, correction = opts$correction, alpha = opts$alpha) diff --git a/parameter_viz.py b/parameter_viz.py new file mode 100644 index 00000000..18b123c8 --- /dev/null +++ b/parameter_viz.py @@ -0,0 +1,159 @@ +from numpy import arange +from numpy import log +from numpy import linspace +from numpy import floor +from numpy import ceil +import math + +from mpl_toolkits.mplot3d import Axes3D +import matplotlib.pyplot as plt + + +def pid(x, f, p, q): + return .5*f*(q**x + (1-q)**x) + (1-.5*f)*((1-p)**x + p**x) + +def pother(x, f, p, q): + b = .5 * f * q + (1-.5*f) * p + return ( .5*f*(q*b + (1-q)*(1-b)) + (1-.5*f)*(p*b**(x-1) + (1-p)*(1-b)**(x-1)) ) + + +def delta(x, f, p, q): + return pid(x,f,p,q) - pother(x,f,p,q) + +def predictN(prob, delta): + if delta == 0: + return float('inf') + return max(1, prob * (1-prob) * 4 / (delta**2)) + +def valueIFPQ(i,f,p,q): + p_id = pid(i, f, p, q) + p_other = pother(i,f,p,q) + return predictN(max(p_id, p_other), abs(p_id - p_other)) + +def detetectionThreshold(f, p, q, h, k): + pStar = .5 * f * q + (1-.5*f) * p # Probability of a bit being 1 from a true value of 0 in the irr + qStar = (1-.5*f) * q + .5 * f * p # Probability of a bit being 0 from a true value of 1 in the irr + if k <= 1: + return float("inf") + probCollision = (1.0 * h) / k + qPrime = qStar*(1-probCollision) + (probCollision*pStar) + if pStar == qPrime: + return float("inf") + elif pStar < qPrime: + return predictN(pStar**h, qPrime**h - pStar**h) + else: + return predictN((1-pStar)**h, (1-qPrime)**h - (1-pStar)**h) + +def printDelta(x): + printDelta(x[1], x[0], x[2]) + +def printDelta(f, p, q): + for i in range(2,10): + p_id = pid(i,f,p,q) + p_other = pother(i,f,p,q) + print(i, p_id, ' vs ', p_other, ' delta ', p_id-p_other, ' for a sum of ', predictN(max(p_id,p_other), p_id-p_other)) + +def toPow2(x): + if x<=1: + return 0 + return 2**floor(log(x)/log(2)) + +def eInf(f, h): + if f <= 1.0: + return 2 * h * log( (1-.5*f)/(.5*f) ) / log(2) + else: + return 2 * h * log( (.5*f)/(1-.5*f) ) / log(2) + +def getData(): + for h in (1, 2): + for f in (.125,.2,.25,.3,.4,.5,.75,1,1.25,1.5,1.75) : + for p in (.0,.1,.2,.3,.4,.5,.6,.7,.8,.9) : + for q in (.15,.25,.35,.45,.55,.65,.75,.85,1) : + maxk = toPow2(valueIFPQ(2,f,p,q)) + detThres = detetectionThreshold(f, p, q, h, maxk) + e = eInf(f,h) + tradeoff = eInf(f,h) * detThres + if not math.isinf(detThres) and e < 10: + yield (f, p, q, h, maxk, e, detThres, valueIFPQ(2,f,p,q), valueIFPQ(10000,f,p,q), tradeoff) + +def toColor(color): + x = max(1, min(255, int(round(color * 256.0)))) + return hex(x*256*256 + x*256 + x)[2:] + +def makePlot(pointGenerator): + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + for f, p, q, h, maxk, e, detThres, val2, val10000, tradeoff in pointGenerator(): + ax.scatter(e, log(val10000)/log(2), log(ceil(detThres))/log(2), s=h*h*20, c=(0.5*f,p,q), marker='o') + ax.view_init(elev=20.,azim=45) + ax.invert_zaxis() + ax.set_xlabel('e \n Epsilon of privacy bound') + ax.set_ylabel('log(val10000) \n Log of number of bits of K needed to form a identifier that could distinguish two users') + ax.set_zlabel('Detectability theashold \n The log base 2 of the number of repports needed to detect a value') + ax.text(1,10,1,"Good") + ax.text(9,-2,12,"Bad") + plt.show() + + +def value(f, p, q, h, k): + maxk = floor(valueIFPQ(2,f,p,q)) + if maxk < k: + return float("inf") + return detetectionThreshold(f, p, q, h, k) + + +def getOptimalPQ(): + epislons = {} + for f in (.75,.5,.4,.333,.25,.2,.15,.125): + for h in (1,2): + e = eInf(f,h) + epislons[e] = (f,h) + + smallestTradeoff = float("inf") + for e in sorted(epislons.iterkeys()): + f = epislons[e][0] + h = epislons[e][1] + smallestTradeoffForE = float("inf") + if h>1 and f<.4: + continue + for k in (8,32,64,126,256): + lowest = float("inf") + for p in linspace(0.0,1.0,101): + for q in linspace(0.0,1.0,101): + detThres = value(f, p, q, h, k) + if detThres < lowest: + lowest = detThres + for p in linspace(0.0,1.0,101): + for q in linspace(0.0,1.0,101): + p=round(p,4) + q=round(q,4) + detThres = value(f, p, q, h, k) + if not math.isinf(detThres) and detThres < lowest * 1.01: + tradeoff = e * detThres + if tradeoff < smallestTradeoff: + yield ( f, p, q, h, k, e, detThres, valueIFPQ(2,f,p,q), valueIFPQ(10000,f,p,q), tradeoff ) + if tradeoff < smallestTradeoffForE: + smallestTradeoffForE = tradeoff + if smallestTradeoffForE < smallestTradeoff: + smallestTradeoff = smallestTradeoffForE + +def printOptimalPQ(): + print("Optimal choices for P and Q for varrious values:") + for f, p, q, h, k, e, detThres, val2, val10000, tradeoff in getOptimalPQ(): + print( 'h={}, k={:3}, f={:4}, p={:4}, q={:4}, epislon={:5}, detThres={}'.format(h,k,f,p,q, round(e,4), ceil(detThres)) ) + +print +print("Showing a plot shoing various points in the space. (Not nessicarly optimal ones)") +makePlot(getData) +print +print("Computing points on the optimal frontier") +printOptimalPQ() +print +print("Plotting the optimal set") +makePlot(getOptimalPQ) + +print("As you can see the optimal points have a few properties in common:") +print("h is always 1") +print("f is never below .2 (Though the exact lower bound of this threashold will require more experimentation)") +print("P and Q are always some extreme (one of them is either 1 or 0)") +print("Higher values of K corilate with higher values of F") diff --git a/tests/gen_counts.R b/tests/gen_counts.R index 576cc1c6..769677c4 100755 --- a/tests/gen_counts.R +++ b/tests/gen_counts.R @@ -178,7 +178,7 @@ main <- function(argv) { params <- ReadParameterFile(params_file) - true_map <- ReadMapFile(true_map_file) + true_map <- ReadMapFile(true_map_file, params) num_unique_values <- length(true_map$strs)