import java.io.FileInputStream;
import java.io.FileOutputStream;
import com.yahoo.memory.Memory;
import com.yahoo.sketches.cpc.CpcSketch;
import com.yahoo.sketches.cpc.CpcUnion;
//simplified file operations and no error handling for clarity
public class CpcExample {
public static void main(String[] args) throws Exception {
final int lgK = 10;
// this section generates two sketches with some overlap and serializes them into files
{
// 100000 distinct keys
CpcSketch sketch1 = new CpcSketch(lgK);
for (int key = 0; key < 100000; key++) sketch1.update(key);
FileOutputStream out1 = new FileOutputStream("CpcSketch1.bin");
out1.write(sketch1.toByteArray());
out1.close();
// 100000 distinct keys
CpcSketch sketch2 = new CpcSketch(lgK);
for (int key = 50000; key < 150000; key++) sketch2.update(key);
FileOutputStream out2 = new FileOutputStream("CpcSketch2.bin");
out2.write(sketch2.toByteArray());
out2.close();
}
// this section deserializes the sketches, produces a union and prints the result
{
FileInputStream in1 = new FileInputStream("CpcSketch1.bin");
byte[] bytes1 = new byte[in1.available()];
in1.read(bytes1);
in1.close();
CpcSketch sketch1 = CpcSketch.heapify(Memory.wrap(bytes1));
FileInputStream in2 = new FileInputStream("CpcSketch2.bin");
byte[] bytes2 = new byte[in2.available()];
in2.read(bytes2);
in2.close();
CpcSketch sketch2 = CpcSketch.heapify(Memory.wrap(bytes2));
CpcUnion union = new CpcUnion(lgK);
union.update(sketch1);
union.update(sketch2);
CpcSketch result = union.getResult();
// debug summary of the union result sketch
System.out.println(result.toString());
System.out.println("Distinct count estimate: " + result.getEstimate());
System.out.println("Distinct count lower bound 95% confidence: " + result.getLowerBound(2));
System.out.println("Distinct count upper bound 95% confidence: " + result.getUpperBound(2));
}
}
}
Output:
### CPD SKETCH - PREAMBLE:
Flavor : SLIDING
LgK : 10
Merge Flag : true
Error Const : 0.6931471805599453
RSE : 0.02166084939249829
Seed Hash : 93cc | 37836
Num Coupons : 7706
Num Pairs (SV) : 27
First Inter Col: 4
Valid Window : true
Valid PairTable: true
Window Offset : 5
KxP : 1024.0
HIP Accum : 0.0
### END CPC SKETCH
Distinct count estimate: 149796.50599220005
Distinct count lower bound 95% confidence: 143416.2744812169
Distinct count upper bound 95% confidence: 156397.0