// simplified file operations and no error handling for clarity
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.ArrayOfLongsSerDe;
import org.apache.datasketches.sampling.ReservoirItemsSketch;
import org.apache.datasketches.sampling.ReservoirItemsUnion;
// this section generates two sketches with some overlap
// and serializes them into files in compact (not updatable) form
{
int k = 8192;
// 100000 unique keys
ReservoirItemsSketch<Long> sketch1 = ReservoirItemsSketch.newInstance(k);
for (long key = 0; key < 100000; key++) { sketch1.update(key); }
FileOutputStream out1 = new FileOutputStream(new File("Reservoir1.bin"));
out1.write(sketch1.toByteArray(new ArrayOfLongsSerDe()));
out1.close();
// 100000 unique keys
// the first 50000 unique keys overlap with sketch1
ReservoirItemsSketch<Long> sketch2 = ReservoirItemsSketch.newInstance(k);
for (long key = 50000; key < 150000; key++) { sketch2.update(key); }
FileOutputStream out2 = new FileOutputStream(new File("Reservoir2.bin"));
out2.write(sketch2.toByteArray(new ArrayOfLongsSerDe()));
out2.close();
}
// this section deserializes the sketches, produces their union, and prints the results
{
FileInputStream in1 = new FileInputStream(new File("Reservoir1.bin"));
byte[] bytes1 = new byte[in1.available()];
in1.read(bytes1);
in1.close();
ReservoirItemsSketch<Long> sketch1 = ReservoirItemsSketch.heapify(Memory.wrap(bytes1),
new ArrayOfLongsSerDe());
FileInputStream in2 = new FileInputStream(new File("Reservoir2.bin"));
byte[] bytes2 = new byte[in2.available()];
in2.read(bytes2);
in2.close();
ReservoirItemsSketch<Long> sketch2 = ReservoirItemsSketch.heapify(Memory.wrap(bytes2),
new ArrayOfLongsSerDe());
int k = sketch1.getK();
ReservoirItemsUnion<Long> union = ReservoirItemsUnion.newInstance(k);
union.update(sketch1);
union.update(sketch2);
ReservoirItemsSketch<Long> unionResult = union.getResult();
// debug summary of the union result sketch
System.out.println(unionResult.toString());
System.out.println("First 10 results in union:");
Long[] samples = unionResult.getSamples();
for (int i = 0; i < 10; i++) {
System.out.println(i + ": " + samples[i]);
}
}
Output:
### ReservoirItemsSketch SUMMARY:
k : 8192
n : 200000
Current size : 8192
Resize factor: X8
### END SKETCH SUMMARY
First 10 results in union:
0: 6843
1: 1
2: 18592
3: 13470
4: 24367
5: 12686
6: 73476
7: 15003
8: 68131
9: 18649