#include <iostream>
#include <fstream>
#include <hll.hpp>
//simplified file operations and no error handling for clarity
int main(int argc, char **argv) {
const int lg_k = 11;
const auto type = datasketches::HLL_4; // this is the default, but explicit here for illustration
// this section generates two sketches with some overlap and serializes them into files
{
// 100000 distinct keys
datasketches::hll_sketch sketch1(lg_k, type); // type is optional, defaults to HLL_4
for (int key = 0; key < 100000; key++) sketch1.update(key);
std::ofstream os1("hll_sketch1.bin");
sketch1.serialize_compact(os1);
// 100000 distinct keys
datasketches::hll_sketch sketch2(lg_k, type); // type is optional, defaults to HLL_4
for (int key = 50000; key < 150000; key++) sketch2.update(key);
std::ofstream os2("hll_sketch2.bin");
sketch2.serialize_compact(os2);
}
// this section deserializes the sketches, produces union and prints the result
{
std::ifstream is1("hll_sketch1.bin");
auto sketch1 = datasketches::hll_sketch::deserialize(is1);
std::ifstream is2("hll_sketch2.bin");
auto sketch2 = datasketches::hll_sketch::deserialize(is2);
datasketches::hll_union u(lg_k);
u.update(sketch1);
u.update(sketch2);
auto sketch = u.get_result(type); // type is optional, defaults to HLL_4
// debug summary of the union result sketch
std::cout << sketch.to_string();
std::cout << "Distinct count estimate: " << sketch.get_estimate() << std::endl;
std::cout << "Distinct count lower bound 95% confidence: " << sketch.get_lower_bound(2) << std::endl;
std::cout << "Distinct count upper bound 95% confidence: " << sketch.get_upper_bound(2) << std::endl;
}
return 0;
}
### HLL SKETCH SUMMARY:
Log Config K : 11
Hll Target : HLL_4
Current Mode : HLL
LB : 148634
Estimate : 152041
UB : 155614
OutOfOrder flag: true
CurMin : 4
NumAtCurMin : 21
HipAccum : 147291
KxQ0 : 19.889
KxQ1 : 0
Distinct count estimate: 152041
Distinct count lower bound 95% confidence: 145234
Distinct count upper bound 95% confidence: 159184