#!/usr/bin/env python3 from os import devnull from sys import stdout, stderr, exit from contextlib import redirect_stdout from argparse import ArgumentParser from warnings import catch_warnings from multiprocessing import Pool from shutil import rmtree from gc import collect from json import loads, dumps from pathlib import Path from typing import TypeVar from cmap import Colormap from hilbert import decode from zlib import compress, crc32 from struct import pack from numpy.typing import NDArray import numpy as np ip_bytes = 4 ip_bits = ip_bytes * 8 num_ips = 1 << ip_bits num_ips_sqrt = 1 << (ip_bits >> 1) def make_coord_range(start: int, end: int): return decode(np.arange(start, end, dtype = np.uint32), num_dims = 2, num_bits = 16).astype(np.uint16) default_batches = 64 default_processes = 4 def make_coords(output_path: Path, batches = default_batches, processes = default_processes): if not 1 <= batches <= 0x10000: raise ValueError(f"batches must be between 1 and {0x10000}") if not 1 <= processes <= 256: raise ValueError(f"processes must be between 1 and 256") ips_per_batch, leftover_batch_ips = divmod(num_ips, batches) if leftover_batch_ips > 0: raise ValueError("the total number of ips must evenly divide into the number of batches") ips_per_process, leftover_process_ips = divmod(ips_per_batch, processes) if leftover_process_ips > 0: raise ValueError("the number of ips within each batch must evenly divide into the number of processes") if output_path.is_dir(): raise ValueError("output path must not be a directory") output_path.write_bytes(b'') with Pool(processes) as p: for batch in range(batches): print(f"starting batch {batch}...") arrs = p.starmap(make_coord_range, ((offset * ips_per_process, offset * ips_per_process + ips_per_process) for offset in range(batch * processes, batch * processes + processes))) print(f"finished batch, writing arrays to file...") with output_path.open("ab") as f: for arr in arrs: f.write(arr.tobytes()) print(f"finished writing to file") def convert(input_path: Path, output_path: Path): print(f"reading csv '{input_path}' into array...", end = " ", flush = True) arr = np.loadtxt(input_path, dtype = np.uint32, delimiter = ",", skiprows = 1) print("done") print("filtering out unsuccessful values...", end = " ", flush = True) arr = arr[arr[:, -1] == 1] print("done") print("removing success column...", end = " ", flush = True) arr = arr[:, :-1] print("done") print("removing duplicate IP addresses...", end = " ", flush = True) arr = arr[np.unique(arr[:, 0], return_index = True)[1]] print("done") print("converting IP addresses from big-endian to little-endian...", end = " ", flush = True) arr[:, 0].byteswap(inplace = True) print("done") print(f"writing array to '{output_path}'") output_path.write_bytes(arr.tobytes()) print("done") default_tile_size = 1 << ip_bits // 4 default_variant_names = ["density", "rtt"] default_colormap_names = ["viridis"] default_quantile = 0.995 default_compression_level = -1 def make_tiles(coords_path: Path, input_path: Path, tiles_dir: Path, *, tile_size = default_tile_size, alpha = False, negative_zoom = False, variant_names: list[str] = default_variant_names, colormap_names: list[str] = default_colormap_names, raws_path: Path | None = None, quantile = default_quantile, num_rows: int | None = None, skip_iters: int | None = None, compression_level : int = default_compression_level, json_path: Path | None = None): if not 64 <= tile_size <= num_ips_sqrt or tile_size & (tile_size - 1) != 0: raise ValueError(f"tile size must be a power of 2 between 64 and {num_ips_sqrt}") if len(variant_names) == 0: raise ValueError("must specify at least one variant") if len(colormap_names) == 0 and raws_path is None: raise ValueError("must specify at least one colormap or a path to save raws to") if not 0 <= quantile <= 1: raise ValueError(f"quantile must be between 0 and 1") if not -1 <= compression_level <= 9: raise ValueError("compression level must be between 0-9 or -1 for automatic") T = TypeVar("T") def dedup_preserving_order(vals: list[T]) -> list[T]: seen = set() result = [] for item in vals: if item not in seen: seen.add(item) result.append(item) return result colormaps = [(colormap_name, Colormap(colormap_name)) for colormap_name in dedup_preserving_order(colormap_names)] channels = 4 if alpha else 3 empty_color = np.zeros(channels, dtype = np.uint8) num_colors = (1 << 16) - 1 final_size = 1 if negative_zoom else tile_size should_generate_density = False should_generate_rtt = False for variant_name in variant_names: if variant_name == "density": should_generate_density = True elif variant_name == "rtt": should_generate_rtt = True else: raise ValueError(f"unknown variant '{variant_name}'") if skip_iters is not None: num_iters = num_ips_sqrt.bit_length() if negative_zoom else (num_ips_sqrt // tile_size).bit_length() if not 0 <= skip_iters < num_iters: raise ValueError("must skip zero or more but not all iterations") if json_path is not None: if json_path.is_dir(): raise ValueError("json path must not be a directory") try: tiles_dir_parts = tiles_dir.relative_to(json_path.parent).parts except ValueError: raise ValueError("tiles path must be relative to the json path") else: tiles_dir_parts = None def get_chunk(tag: bytes, data = b""): return b"".join((pack("!I", len(data)), tag, data, pack("!I", crc32(data, crc32(tag)) & (2 ** 32 - 1)))) signature = b"\x89PNG\r\n\x1a\n" end_chunk = get_chunk(b"IEND") def create_tiles(path: Path, data: np.ndarray, colors: NDArray[np.uint8] | None = None): if data.shape[0] > tile_size: tiles_per_side = data.shape[0] // tile_size z = tiles_per_side.bit_length() - 1 z_path = path / f"{z}" z_path.mkdir(exist_ok = True, parents = True) def tile_generator(): for y in range(tiles_per_side): y_path = z_path / f"{y}" y_path.mkdir(exist_ok = True) for x in range(tiles_per_side): yield (y_path / f"{x}", data[ y * tile_size : y * tile_size + tile_size, x * tile_size : x * tile_size + tile_size, ]) else: tiles_per_side = 1 z = -((tile_size // data.shape[0]).bit_length() - 1) z_path = path / f"{z}" z_path.mkdir(exist_ok = True, parents = True) def tile_generator(): y_path = z_path / "0" y_path.mkdir(exist_ok = True) yield (y_path / "0", data) print(f"writing {tiles_per_side * tiles_per_side} ({tiles_per_side}x{tiles_per_side}) tiles to '{z_path}'...", end = " ", flush = True) if colors is None: for x_path, tile in tile_generator(): x_path.with_suffix(".bin").write_bytes(compress(tile.tobytes(), level = compression_level)) else: img_size = tile_size if data.shape[0] > tile_size else data.shape[0] ihdr_chunk = get_chunk(b"IHDR", pack("!2I5B", img_size, img_size, 8, 2 if colors.shape[1] == 3 else 6, 0, 0, 0)) preamble = signature + ihdr_chunk for x_path, tile in tile_generator(): idat_chunk = get_chunk(b"IDAT", compress(np.insert(colors[tile].reshape(img_size, -1), 0, 0, axis = 1).tobytes(), level = compression_level)) x_path.with_suffix(".png").write_bytes(b"".join((preamble, idat_chunk, end_chunk))) print("done") def get_colors(colormap: Colormap, num_colors: int = num_colors): return np.concatenate(([empty_color], ((colormap([0.0]) if num_colors == 1 else colormap.lut(num_colors))[:, 0:channels] * 255).astype(np.uint8))) def get_scan_data() -> tuple[NDArray[np.uint32], NDArray[np.uint32]]: print(f"reading scan data from file '{input_path}'...", end = " ", flush = True) data = np.fromfile(input_path, count = num_rows * 2 if num_rows else -1, dtype = np.uint32).reshape(-1, 2) ip_arr = np.copy(data.T[0]) rtt_arr = np.copy(data.T[1]) print("done") return (ip_arr, rtt_arr) def get_all_data() -> tuple[tuple[NDArray[np.uint16], NDArray[np.uint16]], NDArray[np.uint32]]: ip_arr, rtt_arr = get_scan_data() print(f"reading coordinates from file '{coords_path}'...", end = " ", flush = True) ip_coords = np.fromfile(coords_path, dtype = np.uint16).reshape(-1, 2) print("done") print(f"converting ip addresses to coordinates...", end = " ", flush = True) xs, ys = ip_coords[ip_arr].T print("done") return ((ys, xs), rtt_arr) coords, rtt_arr = get_all_data() def normalize_data(data: NDArray[np.uint32], max_value: float) -> NDArray[np.uint16]: divisor = (max_value - 1) / num_colors print("normalizing data: getting non-zero...", end = " ", flush = True) non_zero = data != 0 print("casting to floats...", end = " ", flush = True) data_f = data.astype(np.float32) print("decrementing...", end = " ", flush = True) data_f[non_zero] -= 1 print("dividing...", end = " ", flush = True) data_f /= divisor print("incrementing...", end = " ", flush = True) data_f[non_zero] += 1 del non_zero collect() print("clipping...", end = " ", flush = True) data_f.clip(0, num_colors, out = data_f) print("casting to ints...", end = " ", flush = True) with catch_warnings(action = "ignore"): data_norm = data_f.astype(np.uint16) print("done") return data_norm def generate_density(): possible_overlaps = 1 variant_name = "density" print(f"allocating empty {num_ips_sqrt}x{num_ips_sqrt} array of density data...", end = " ", flush = True) density_data = np.zeros((num_ips_sqrt, num_ips_sqrt), dtype = np.uint32) print("done") print(f"assigning values to density data array...", end = " ", flush = True) density_data[coords] = possible_overlaps print("done") def squish(): nonlocal density_data nonlocal possible_overlaps density_data = np.swapaxes(density_data.reshape(density_data.shape[0] >> 1, 2, density_data.shape[1] >> 1, 2), 1, 2) print("calculating density sum...", end = " ", flush = True) density_data[:, :, 0, 0] += density_data[:, :, 0, 1] density_data[:, :, 0, 0] += density_data[:, :, 1, 0] density_data[:, :, 0, 0] += density_data[:, :, 1, 1] print("done") print(f"shrinking density data from {density_data.shape[0] * 2}x{density_data.shape[1] * 2} to {density_data.shape[0]}x{density_data.shape[1]}...", end = " ", flush = True) density_data = np.copy(density_data[:, :, 0, 0]) print("done") possible_overlaps *= 4 if skip_iters is not None: for _ in range(skip_iters): squish() def write_all_colormaps(): if raws_path is not None: create_tiles(raws_path / variant_name, density_data.view(np.uint8).reshape(density_data.shape[0], density_data.shape[1], 4)) # skip normalizing step and just generate color stop for each possible value if not too many possibilities if possible_overlaps <= num_colors + 1: for colormap_name, colormap in colormaps: create_tiles(tiles_dir / variant_name / colormap_name, density_data, get_colors(colormap, possible_overlaps)) else: density_data_norm = normalize_data(density_data, possible_overlaps) for colormap_name, colormap in colormaps: create_tiles(tiles_dir / variant_name / colormap_name, density_data_norm, get_colors(colormap)) write_all_colormaps() while density_data.shape[0] > final_size: squish() write_all_colormaps() def generate_rtt(): nonlocal rtt_arr variant_name = "rtt" print(f"retrieving {quantile:.1%} quantile for rtt data...", end = " ", flush = True) rtt_quantile = int(np.quantile(rtt_arr, quantile)) print("done") print("clipping rtt data between 0 and quantile...", end = " ", flush = True) rtt_arr.clip(0, rtt_quantile, out = rtt_arr) print("done") print(f"allocating empty {num_ips_sqrt}x{num_ips_sqrt} array for rtt data...", end = " ", flush = True) rtt_data = np.zeros((num_ips_sqrt, num_ips_sqrt), dtype = np.uint32) print("done") print(f"assigning values to rtt data array...", end = " ", flush = True) rtt_data[coords] = rtt_arr print("done") del rtt_arr collect() def squish(): nonlocal rtt_data print("sorting rtt values for median calculation...", end = " ", flush = True) rtt_data = np.swapaxes(rtt_data.reshape(rtt_data.shape[0] >> 1, 2, rtt_data.shape[1] >> 1, 2), 1, 2) mask = np.empty((rtt_data.shape[0], rtt_data.shape[1]), dtype = np.bool_) np.less(rtt_data[:, :, 0, 0], rtt_data[:, :, 0, 1], out = mask) # sort first row rtt_data[mask, 0] = rtt_data[mask, 0, ::-1] np.less(rtt_data[:, :, 1, 0], rtt_data[:, :, 1, 1], out = mask) # sort second row rtt_data[mask, 1] = rtt_data[mask, 1, ::-1] np.less(rtt_data[:, :, 0, 1], rtt_data[:, :, 1, 1], out = mask) # sort second column rtt_data[mask, :, 1] = rtt_data[mask, ::-1, 1] np.greater(rtt_data[:, :, 0, 0], rtt_data[:, :, 1, 0], out = mask) # sort first column in reverse order rtt_data[mask, :, 0] = rtt_data[mask, ::-1, 0] np.greater(rtt_data[:, :, 0, 0], rtt_data[:, :, 0, 1], out = mask) # sort first row in reverse order rtt_data[mask, 0] = rtt_data[mask, 0, ::-1] rtt_data[:, :, :, 0] = rtt_data[:, :, ::-1, 0] # swap first column print("done") print("calculating median rtt values...", end = " ", flush = True) mask2 = np.empty((rtt_data.shape[0], rtt_data.shape[1]), dtype = np.bool_) # need second mask for binary ops np.not_equal(rtt_data[:, :, 1, 1], 0, out = mask) # four nums populated rtt_data[mask, 0, 0] = rtt_data[mask, 0, 1] rtt_data[mask, 0, 0] -= rtt_data[mask, 1, 0] rtt_data[mask, 0, 0] >>= 1 rtt_data[mask, 0, 0] += rtt_data[mask, 1, 0] # take average of middle two nums np.logical_and(np.not_equal(rtt_data[:, :, 1, 0], 0, out = mask), np.equal(rtt_data[:, :, 1, 1], 0, out = mask2), out = mask) # three nums populated rtt_data[mask, 0, 0] = rtt_data[mask, 0, 1] # take middle of three nums np.logical_and(np.not_equal(rtt_data[:, :, 0, 1], 0, out = mask), np.equal(rtt_data[:, :, 1, 0], 0, out = mask2), out = mask) # two nums populated rtt_data[mask, 0, 0] -= rtt_data[mask, 0, 1] rtt_data[mask, 0, 0] >>= 1 rtt_data[mask, 0, 0] += rtt_data[mask, 0, 1] # take average of first two nums # everything else (1 or 0 nums populated) don't need any modifications print("done") print(f"shrinking rtt data from {rtt_data.shape[0] * 2}x{rtt_data.shape[1] * 2} to {rtt_data.shape[0]}x{rtt_data.shape[1]}...", end = " ", flush = True) rtt_data = np.copy(rtt_data[:, :, 0, 0]) print("done") if skip_iters is not None: for _ in range(skip_iters): squish() def write_all_colormaps(): if raws_path is not None: create_tiles(raws_path / variant_name, rtt_data.view(np.uint8).reshape(rtt_data.shape[0], rtt_data.shape[1], 4)) rtt_data_norm = normalize_data(rtt_data, rtt_quantile) for colormap_name, colormap in colormaps: create_tiles(tiles_dir / variant_name / colormap_name, rtt_data_norm, get_colors(colormap)) write_all_colormaps() while rtt_data.shape[0] > final_size: squish() write_all_colormaps() if should_generate_rtt: generate_rtt() else: del rtt_arr collect() if should_generate_density: generate_density() if json_path is not None and tiles_dir_parts is not None: try: text = json_path.read_text(encoding = "UTF-8") except: print("json file not found at provided path, so it will be created instead") tile_metadata = {} else: try: tile_metadata: dict = loads(text) except: print("invalid json found at provided path, so re-creating file") tile_metadata = {} tile_metadata_cur = tile_metadata for part in tiles_dir_parts: if not part in tile_metadata_cur: tile_metadata_cur[part] = {} tile_metadata_cur = tile_metadata_cur[part] for variant_name in variant_names: if not variant_name in tile_metadata_cur: tile_metadata_cur[variant_name] = dedup_preserving_order(colormap_names) else: tile_metadata_cur[variant_name] = dedup_preserving_order(tile_metadata_cur[variant_name] + colormap_names) print(f"writing metadata to json file at '{json_path}'...", end = " ", flush = True) json_path.write_text(dumps(tile_metadata, indent=2), encoding = "UTF-8") print("done") def remove_tiles(tiles_dir: Path, *, json_path: Path | None = None): if not tiles_dir.is_dir(): raise ValueError(f"'{tiles_dir}' is not an existing directory") if json_path: if json_path.is_dir(): raise ValueError("json path must not be a directory") try: *tiles_dir_parts, tiles_dir_final = tiles_dir.relative_to(json_path.parent).parts except ValueError: raise ValueError("tiles path must be relative to but not containing the json path") try: text = json_path.read_text(encoding = "UTF-8") except: raise ValueError("json file not found at provided path") try: tile_metadata = loads(text) except: raise ValueError("invalid json found at provided path") tile_metadata_cur = tile_metadata try: for part in tiles_dir_parts: tile_metadata_cur = tile_metadata_cur[part] if isinstance(tile_metadata_cur, list): tile_metadata_cur = tile_metadata_cur.remove(tiles_dir_final) else: del tile_metadata_cur[tiles_dir_final] except: raise ValueError(f"unable to find path '{'/'.join([*tiles_dir_parts, tiles_dir_final])}' within json file") print(f"writing metadata to json file at '{json_path}'...", end = " ", flush = True) json_path.write_text(dumps(tile_metadata, indent=2), encoding = "UTF-8") print("done") print(f"removing files from '{tiles_dir}'...", end = " ", flush = True) rmtree(tiles_dir) print("done") def parse_list_arg(arg: str): return [x.strip().lower() for x in arg.split(",") if x.strip()] def main(): parser = ArgumentParser("ipmap") parser.add_argument("-q", "--quiet", action = "store_true", help = "decrease output verbosity") subparsers = parser.add_subparsers(dest = "command", required = True, help = "the command to run") mkcoords_parser = subparsers.add_parser("mkcoords", help = "generate coordinates corresponding to each IP address") mkcoords_parser.add_argument("-b", "--batches", default = default_batches, type = int, help = "the number of batches to split the task into (default: %(default)s)") mkcoords_parser.add_argument("-p", "--processes", default = default_processes, type = int, help = "the number of processes to split the task across (default: %(default)s)") mkcoords_parser.add_argument("output", help = "the output path to save the generated coordinates to") convert_parser = subparsers.add_parser("convert", help = "convert scan data from csv to parquet format") convert_parser.add_argument("input", help = "the input path of the csv file to read the scan data from") convert_parser.add_argument("output", help = "the output path of the parquet file to save the converted scan data to") mktiles_parser = subparsers.add_parser("mktiles", help = "generate tile images from scan data in parquet format") mktiles_parser.add_argument("-t", "--tile-size", default = default_tile_size, type = int, help = "the tile size to use (default: %(default)s)") mktiles_parser.add_argument("-a", "--alpha", action = "store_true", help = "use alpha channel instead of black") mktiles_parser.add_argument("-z", "--negative-zoom", action = "store_true", help = "generate negative zoom levels for tiles") mktiles_parser.add_argument("-v", "--variants", default = ",".join(default_variant_names), help = "a comma separated list of variants to generate (default: %(default)s)") mktiles_parser.add_argument("-c", "--colormaps", default = ",".join(default_colormap_names), help = "a comma separated list of colormaps to generate (default: %(default)s)") mktiles_parser.add_argument("-r", "--raws", help = "generate images containing the raw data for each selected variant and save them to the provided path (default: none)") mktiles_parser.add_argument("-q", "--quantile", type = float, default = default_quantile, help = "the quantile to use for scaling data such as rtt (default: %(default)s)") mktiles_parser.add_argument("-n", "--num-rows", type = int, help = "how many rows to read from the scan data (default: all)") mktiles_parser.add_argument("-s", "--skip-iters", type = int, help = "how many iterations to skip generating images for (default: none)") mktiles_parser.add_argument("-C", "--compression-level", default = default_compression_level, type = int, help = "the level of compression to use for the tile images (default: %(default)s)") mktiles_parser.add_argument("-j", "--json", help = "the path for the json file to store metadata about the tile images (default: none)") mktiles_parser.add_argument("coords", help = "the path of the binary file containing the coords to map IP addresses to") mktiles_parser.add_argument("input", help = "the input path of the parquet file to read the scan data from") mktiles_parser.add_argument("output", help = "the output path to save the generated tile images to") rmtiles_parser = subparsers.add_parser("rmtiles", help = "remove tile images") rmtiles_parser.add_argument("-j", "--json", help = "the path for the json file to store metadata about the tile images (default: none)") rmtiles_parser.add_argument("input", help = "the path containing tile images to remove") args = parser.parse_args() try: with redirect_stdout(open(devnull, "w") if args.quiet else stdout): match args.command: case "mkcoords": make_coords(output_path = Path(args.output), batches = args.batches, processes = args.processes) case "convert": convert(input_path = Path(args.input), output_path = Path(args.output)) case "mktiles": make_tiles(coords_path = Path(args.coords), input_path = Path(args.input), tiles_dir = Path(args.output), tile_size = args.tile_size, alpha = args.alpha, negative_zoom = args.negative_zoom, variant_names = parse_list_arg(args.variants), colormap_names = parse_list_arg(args.colormaps), raws_path = Path(args.raws) if args.raws else None, quantile = args.quantile, num_rows = args.num_rows, skip_iters = args.skip_iters, compression_level = args.compression_level, json_path = Path(args.json) if args.json else None) case "rmtiles": remove_tiles(tiles_dir = Path(args.input), json_path = Path(args.json) if args.json else None) case _: raise ValueError("invalid command") except ValueError as e: print(f"error: {e}", file = stderr) exit(1) if __name__ == "__main__": main()