// simplified file operations and no error handling for clarity
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.theta.Sketch;
import org.apache.datasketches.theta.Sketches;
import org.apache.datasketches.theta.UpdateSketch;
import org.apache.datasketches.theta.Union;
import org.apache.datasketches.theta.Intersection;
import org.apache.datasketches.theta.SetOperation;
// this section generates two sketches with some overlap
// and serializes them into files in compact (not updatable) form
{
// 100000 unique keys
UpdateSketch sketch1 = UpdateSketch.builder().build();
for (int key = 0; key < 100000; key++) sketch1.update(key);
FileOutputStream out1 = new FileOutputStream("ThetaSketch1.bin");
out1.write(sketch1.compact().toByteArray());
out1.close();
// 100000 unique keys
// the first 50000 unique keys overlap with sketch1
UpdateSketch sketch2 = UpdateSketch.builder().build();
for (int key = 50000; key < 150000; key++) sketch2.update(key);
FileOutputStream out2 = new FileOutputStream("ThetaSketch2.bin");
out2.write(sketch2.compact().toByteArray());
out2.close();
}
// this section deserializes the sketches, produces union and intersection and prints the results
{
FileInputStream in1 = new FileInputStream("ThetaSketch1.bin");
byte[] bytes1 = new byte[in1.available()];
in1.read(bytes1);
in1.close();
Sketch sketch1 = Sketches.wrapSketch(Memory.wrap(bytes1));
FileInputStream in2 = new FileInputStream("ThetaSketch2.bin");
byte[] bytes2 = new byte[in2.available()];
in2.read(bytes2);
in2.close();
Sketch sketch2 = Sketches.wrapSketch(Memory.wrap(bytes2));
Union union = SetOperation.builder().buildUnion();
union.union(sketch1);
union.union(sketch2);
Sketch unionResult = union.getResult();
// debug summary of the union result sketch
System.out.println(unionResult.toString());
System.out.println("Union unique count estimate: " + unionResult.getEstimate());
System.out.println("Union unique count lower bound 95% confidence: " + unionResult.getLowerBound(2));
System.out.println("Union unique count upper bound 95% confidence: " + unionResult.getUpperBound(2));
Intersection intersection = SetOperation.builder().buildIntersection();
intersection.intersection(sketch1);
intersection.intersection(sketch2);
Sketch intersectionResult = intersection.getResult();
// debug summary of the intersection result sketch
System.out.println(intersectionResult.toString());
System.out.println("Intersection unique count estimate: " + intersectionResult.getEstimate());
System.out.println("Intersection unique count lower bound 95% confidence: " + intersectionResult.getLowerBound(2));
System.out.println("Intersection unique count upper bound 95% confidence: " + intersectionResult.getUpperBound(2));
}
Output:
### HeapCompactOrderedSketch SUMMARY:
Estimate : 149586.73149344584
Upper Bound, 95% conf : 154287.5017892762
Lower Bound, 95% conf : 145028.6046846571
Theta (double) : 0.027382107751846067
Theta (long) : 252555366948521403
Theta (long) hex : 038141c4a515c5bb
EstMode? : true
Empty? : false
Array Size Entries : 4096
Retained Entries : 4096
Seed Hash : 93cc
### END SKETCH SUMMARY
Union unique count estimate: 149586.73149344584
Union unique count lower bound 95% confidence: 145028.6046846571
Union unique count upper bound 95% confidence: 154287.5017892762
### HeapCompactOrderedSketch SUMMARY:
Estimate : 48249.113729035394
Upper Bound, 95% conf : 50358.736970106176
Lower Bound, 95% conf : 46227.35737896924
Theta (double) : 0.04377282475820978
Theta (long) : 403733047849016500
Theta (long) hex : 059a591165205cb4
EstMode? : true
Empty? : false
Array Size Entries : 2112
Retained Entries : 2112
Seed Hash : 93cc
### END SKETCH SUMMARY
Intersection unique count estimate: 48249.113729035394
Intersection unique count lower bound 95% confidence: 46227.35737896924
Intersection unique count upper bound 95% confidence: 50358.736970106176