|
|
|
@ -1,103 +1,22 @@
|
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
|
|
from os import devnull
|
|
|
|
|
from sys import stdout, stderr, exit
|
|
|
|
|
from contextlib import redirect_stdout
|
|
|
|
|
from argparse import ArgumentParser
|
|
|
|
|
from warnings import catch_warnings
|
|
|
|
|
from multiprocessing import Pool
|
|
|
|
|
from shutil import rmtree
|
|
|
|
|
from gc import collect
|
|
|
|
|
from json import loads, dumps
|
|
|
|
|
import sys
|
|
|
|
|
import math
|
|
|
|
|
import functools
|
|
|
|
|
import argparse
|
|
|
|
|
import json
|
|
|
|
|
import shutil
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
from typing import Literal, TypeVar
|
|
|
|
|
from png import Writer
|
|
|
|
|
from cmap import Colormap
|
|
|
|
|
from hilbert import decode
|
|
|
|
|
from numpy.typing import NDArray
|
|
|
|
|
import png
|
|
|
|
|
import hilbert
|
|
|
|
|
import numpy as np
|
|
|
|
|
import polars as pl
|
|
|
|
|
from cmap import Colormap
|
|
|
|
|
from multiprocessing import Pool
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
from typing import Literal
|
|
|
|
|
|
|
|
|
|
ip_bytes = 4
|
|
|
|
|
ip_bits = ip_bytes * 8
|
|
|
|
|
num_ips = 1 << ip_bits
|
|
|
|
|
num_ips_sqrt = 1 << ip_bits // 2
|
|
|
|
|
|
|
|
|
|
def make_coord_range(start: int, end: int):
|
|
|
|
|
return decode(np.arange(start, end, dtype = np.uint32), num_dims = 2, num_bits = 16).astype(np.uint16)
|
|
|
|
|
|
|
|
|
|
default_batches = 64
|
|
|
|
|
default_processes = 4
|
|
|
|
|
|
|
|
|
|
def make_coords(output_path: Path, batches = default_batches, processes = default_processes):
|
|
|
|
|
if not 1 <= batches <= 0x10000:
|
|
|
|
|
raise ValueError(f"batches must be between 1 and {0x10000}")
|
|
|
|
|
if not 1 <= processes <= 256:
|
|
|
|
|
raise ValueError(f"processes must be between 1 and 256")
|
|
|
|
|
ips_per_batch, leftover_batch_ips = divmod(num_ips, batches)
|
|
|
|
|
if leftover_batch_ips > 0:
|
|
|
|
|
raise ValueError("the total number of ips must evenly divide into the number of batches")
|
|
|
|
|
ips_per_process, leftover_process_ips = divmod(ips_per_batch, processes)
|
|
|
|
|
if leftover_process_ips > 0:
|
|
|
|
|
raise ValueError("the number of ips within each batch must evenly divide into the number of processes")
|
|
|
|
|
if output_path.is_dir():
|
|
|
|
|
raise ValueError("output path must not be a directory")
|
|
|
|
|
|
|
|
|
|
output_path.write_bytes(b'')
|
|
|
|
|
with Pool(processes) as p:
|
|
|
|
|
for batch in range(batches):
|
|
|
|
|
print(f"starting batch {batch}...")
|
|
|
|
|
arrs = p.starmap(make_coord_range, ((offset * ips_per_process, offset * ips_per_process + ips_per_process)
|
|
|
|
|
for offset in range(batch * processes, batch * processes + processes)))
|
|
|
|
|
print(f"finished batch, writing arrays to file...")
|
|
|
|
|
with output_path.open("ab") as f:
|
|
|
|
|
for arr in arrs:
|
|
|
|
|
f.write(arr.tobytes())
|
|
|
|
|
print(f"finished writing to file")
|
|
|
|
|
|
|
|
|
|
def convert(input_path: Path, output_path: Path):
|
|
|
|
|
print(f"reading csv '{input_path}' into array...", end = " ", flush = True)
|
|
|
|
|
arr = np.loadtxt(input_path, dtype = np.uint32, delimiter = ",", skiprows = 1)
|
|
|
|
|
print("done")
|
|
|
|
|
print("filtering out unsuccessful values...", end = " ", flush = True)
|
|
|
|
|
arr = arr[arr[:, -1] == 1]
|
|
|
|
|
print("done")
|
|
|
|
|
print("removing success column...", end = " ", flush = True)
|
|
|
|
|
arr = arr[:, :-1]
|
|
|
|
|
print("done")
|
|
|
|
|
print("removing duplicate IP addresses...", end = " ", flush = True)
|
|
|
|
|
arr = arr[np.unique(arr[:, 0], return_index = True)[1]]
|
|
|
|
|
print("done")
|
|
|
|
|
print("converting IP addresses from big-endian to little-endian...", end = " ", flush = True)
|
|
|
|
|
arr[:, 0].byteswap(inplace = True)
|
|
|
|
|
print("done")
|
|
|
|
|
print(f"writing array to '{output_path}'")
|
|
|
|
|
output_path.write_bytes(arr.tobytes())
|
|
|
|
|
print("done")
|
|
|
|
|
|
|
|
|
|
default_tile_size = 1 << ip_bits // 4
|
|
|
|
|
default_variant_names = ["density", "rtt"]
|
|
|
|
|
default_colormap_names = ["viridis"]
|
|
|
|
|
default_quantile = 0.995
|
|
|
|
|
|
|
|
|
|
def make_tiles(coords_path: Path, input_path: Path, tiles_dir: Path, *,
|
|
|
|
|
tile_size = default_tile_size, alpha = False,
|
|
|
|
|
variant_names: list[str] = default_variant_names,
|
|
|
|
|
colormap_names: list[str] = default_colormap_names,
|
|
|
|
|
quantile = default_quantile, num_rows: int | None = None,
|
|
|
|
|
skip_iters: int | None = None, json_path: Path | None = None):
|
|
|
|
|
|
|
|
|
|
if not 64 <= tile_size <= num_ips_sqrt or tile_size & (tile_size - 1) != 0:
|
|
|
|
|
raise ValueError(f"tile size must be a power of 2 between 64 and {num_ips_sqrt}")
|
|
|
|
|
if len(variant_names) == 0:
|
|
|
|
|
raise ValueError("must specify at least one variant")
|
|
|
|
|
if len(colormap_names) == 0:
|
|
|
|
|
raise ValueError("must specify at least one colormap")
|
|
|
|
|
if not 0 <= quantile <= 1:
|
|
|
|
|
raise ValueError(f"quantile must be between 0 and 1")
|
|
|
|
|
|
|
|
|
|
T = TypeVar("T")
|
|
|
|
|
def dedup_preserving_order(vals: list[T]) -> list[T]:
|
|
|
|
|
def dedup_preserving_order(vals: list) -> list:
|
|
|
|
|
seen = set()
|
|
|
|
|
result = []
|
|
|
|
|
for item in vals:
|
|
|
|
@ -106,23 +25,74 @@ def make_tiles(coords_path: Path, input_path: Path, tiles_dir: Path, *,
|
|
|
|
|
result.append(item)
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
colormaps = [(colormap_name, Colormap(colormap_name)) for colormap_name in dedup_preserving_order(colormap_names)]
|
|
|
|
|
channels = 4 if alpha else 3
|
|
|
|
|
empty_color = np.zeros(channels, dtype = np.uint8)
|
|
|
|
|
def convert_to_parquet(csv_path: Path, parquet_path: Path, *, quiet = False):
|
|
|
|
|
if not quiet:
|
|
|
|
|
print(f"scanning csv '{csv_path}' into parquet '{parquet_path}'...", end = " ", flush = True)
|
|
|
|
|
lf = pl.scan_csv(csv_path, schema={
|
|
|
|
|
"saddr": pl.String,
|
|
|
|
|
"rtt_us": pl.UInt64,
|
|
|
|
|
"success": pl.UInt8
|
|
|
|
|
})
|
|
|
|
|
lf = lf.filter(pl.col("success") == 1)
|
|
|
|
|
lf = lf.drop("success")
|
|
|
|
|
lf = lf.with_columns(rtt_us = pl.col("rtt_us").clip(0, 0xFFFFFFFF).cast(pl.UInt32))
|
|
|
|
|
lf = lf.with_columns(saddr = pl.col("saddr").str.split_exact(".", 3).struct.rename_fields(["a", "b", "c", "d"]))
|
|
|
|
|
lf = lf.with_columns(saddr = pl.col("saddr").struct.field("a").cast(pl.UInt32) * 0x1000000 + pl.col("saddr").struct.field("b").cast(pl.UInt32) * 0x10000 + pl.col("saddr").struct.field("c").cast(pl.UInt32) * 0x100 + pl.col("saddr").struct.field("d").cast(pl.UInt32))
|
|
|
|
|
lf = lf.unique("saddr")
|
|
|
|
|
lf = lf.with_columns(coords = pl.col("saddr").map_batches(functools.partial(hilbert.decode, num_dims = 2, num_bits = 16), pl.Array(pl.UInt16, 2), is_elementwise = True))
|
|
|
|
|
lf = lf.with_columns(x = pl.col("coords").arr.get(0), y = pl.col("coords").arr.get(1))
|
|
|
|
|
lf = lf.drop("coords")
|
|
|
|
|
lf.sink_parquet(parquet_path)
|
|
|
|
|
if not quiet:
|
|
|
|
|
print("done")
|
|
|
|
|
|
|
|
|
|
should_generate_density = False
|
|
|
|
|
should_generate_rtt = False
|
|
|
|
|
for variant_name in variant_names:
|
|
|
|
|
if variant_name == "density":
|
|
|
|
|
should_generate_density = True
|
|
|
|
|
elif variant_name == "rtt":
|
|
|
|
|
should_generate_rtt = True
|
|
|
|
|
def write_tile(path: Path, rows: np.ndarray, *, alpha = False):
|
|
|
|
|
path.parent.mkdir(exist_ok = True, parents = True)
|
|
|
|
|
png.Writer(rows.shape[1], rows.shape[0], greyscale = False, alpha = alpha).write_packed(path.open("wb"), rows)
|
|
|
|
|
|
|
|
|
|
default_tile_size = 256
|
|
|
|
|
default_variants = ["density", "rtt"]
|
|
|
|
|
default_colormaps = ["viridis"]
|
|
|
|
|
default_quantile = 0.995
|
|
|
|
|
default_processes = 16
|
|
|
|
|
|
|
|
|
|
def generate_tiles(parquet_path: Path, tiles_dir: Path, *, tile_size = default_tile_size, alpha = False,
|
|
|
|
|
variants: list[str] = default_variants, colormaps: list[str] = default_colormaps,
|
|
|
|
|
quantile = default_quantile, processes = default_processes, num_rows: int | None = None,
|
|
|
|
|
skip_iters: int | None = None, json_path: Path | None = None, quiet = False):
|
|
|
|
|
|
|
|
|
|
if not 1 <= tile_size <= 0x10000 or tile_size & (tile_size - 1) != 0:
|
|
|
|
|
raise ValueError(f"tile size must be a power of 2 between 1 and {0x10000}")
|
|
|
|
|
tiles_per_side = int(math.sqrt(0x100000000)) // tile_size
|
|
|
|
|
|
|
|
|
|
if len(variants) == 0:
|
|
|
|
|
raise ValueError("must specify at least one variant")
|
|
|
|
|
|
|
|
|
|
if len(colormaps) == 0:
|
|
|
|
|
raise ValueError("must specify at least one colormap")
|
|
|
|
|
|
|
|
|
|
if not 0 <= quantile <= 1:
|
|
|
|
|
raise ValueError(f"quantile must be between 0 and 1")
|
|
|
|
|
|
|
|
|
|
colormaps = dedup_preserving_order(colormaps)
|
|
|
|
|
channels = 4 if alpha else 3
|
|
|
|
|
colormaps_by_name = { colormap: [bytes(c) for c in (Colormap(colormap).lut()[:,0:channels] * (256.0 - np.finfo(np.float32).eps)).astype(np.uint8)] for colormap in colormaps }
|
|
|
|
|
|
|
|
|
|
generate_density = False
|
|
|
|
|
generate_rtt = False
|
|
|
|
|
for variant in variants:
|
|
|
|
|
if variant == "density":
|
|
|
|
|
generate_density = True
|
|
|
|
|
elif variant == "rtt":
|
|
|
|
|
generate_rtt = True
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError(f"unknown variant '{variant_name}'")
|
|
|
|
|
raise ValueError(f"unknown variant '{variant}'")
|
|
|
|
|
|
|
|
|
|
if skip_iters is not None:
|
|
|
|
|
if not 0 <= skip_iters < (num_ips_sqrt // tile_size).bit_length():
|
|
|
|
|
raise ValueError("must skip zero or more but not all iterations")
|
|
|
|
|
if skip_iters <= 0:
|
|
|
|
|
raise ValueError("cannot skip negative iterations")
|
|
|
|
|
elif skip_iters >= tiles_per_side.bit_length():
|
|
|
|
|
raise ValueError("cannot skip all iterations")
|
|
|
|
|
|
|
|
|
|
if json_path is not None:
|
|
|
|
|
if json_path.is_dir():
|
|
|
|
@ -134,193 +104,89 @@ def make_tiles(coords_path: Path, input_path: Path, tiles_dir: Path, *,
|
|
|
|
|
else:
|
|
|
|
|
tiles_dir_parts = None
|
|
|
|
|
|
|
|
|
|
def create_images(data: np.ndarray, colormap: Colormap, num_colors: int, path: Path):
|
|
|
|
|
print(f"creating {num_colors} color stop(s) of {colormap.name} colormap...", end = " ", flush = True)
|
|
|
|
|
colors = np.concatenate(([empty_color], ((colormap([0.0]) if num_colors == 1 else colormap.lut(num_colors))[:, 0:channels] * 255).astype(np.uint8)))
|
|
|
|
|
if not quiet:
|
|
|
|
|
print(f"reading parquet '{parquet_path}'...", end = " ", flush = True)
|
|
|
|
|
df = pl.read_parquet(parquet_path, columns = ["x", "y", "rtt_us"], n_rows=num_rows)
|
|
|
|
|
if not quiet:
|
|
|
|
|
print("done")
|
|
|
|
|
print(f"creating {data.shape[1]}x{data.shape[0]} pixel image for {colormap.name} colormap...", end = " ", flush = True)
|
|
|
|
|
image_data = colors[data]
|
|
|
|
|
|
|
|
|
|
possible_overlaps = 1
|
|
|
|
|
rtt_quantile = df.get_column("rtt_us").quantile(quantile) or 1.0
|
|
|
|
|
df = df.with_columns(count = pl.lit(possible_overlaps, pl.UInt32), rtt_us = pl.col("rtt_us").clip(0, rtt_quantile))
|
|
|
|
|
|
|
|
|
|
write_tile_p = functools.partial(write_tile, alpha = alpha)
|
|
|
|
|
|
|
|
|
|
def generate_images(colormap: str, type_name: str, series: pl.Series):
|
|
|
|
|
nonlocal df
|
|
|
|
|
|
|
|
|
|
if not quiet:
|
|
|
|
|
print(f"creating {type_name} image data with {colormap} colormap...", end = " ", flush = True)
|
|
|
|
|
image_data = np.zeros((tiles_per_side * tile_size, tiles_per_side * tile_size), dtype = f"S{channels}")
|
|
|
|
|
image_data[(df.get_column("y"), df.get_column("x"))] = (series * 255.9999).clip(0, 255).cast(pl.UInt8).replace(pl.int_range(256), colormaps_by_name[colormap], return_dtype = pl.Binary)
|
|
|
|
|
if not quiet:
|
|
|
|
|
print("done")
|
|
|
|
|
tiles_per_side = image_data.shape[0] // tile_size
|
|
|
|
|
|
|
|
|
|
if not quiet:
|
|
|
|
|
print(f"writing {tiles_per_side}x{tiles_per_side}={tiles_per_side * tiles_per_side} {type_name} images with {colormap} colormap...", end = " ", flush = True)
|
|
|
|
|
with Pool(processes) as pool:
|
|
|
|
|
z = tiles_per_side.bit_length() - 1
|
|
|
|
|
z_path = path / f"{z}"
|
|
|
|
|
z_path = tiles_dir / type_name / colormap / f"{z}"
|
|
|
|
|
z_path.mkdir(exist_ok = True, parents = True)
|
|
|
|
|
print(f"writing {tiles_per_side * tiles_per_side} ({tiles_per_side}x{tiles_per_side}) images to '{path}'...", end = " ", flush = True)
|
|
|
|
|
for y in range(tiles_per_side):
|
|
|
|
|
y_path = z_path / f"{y}"
|
|
|
|
|
y_path.mkdir(exist_ok = True)
|
|
|
|
|
for x in range(tiles_per_side):
|
|
|
|
|
path = y_path / f"{x}.png"
|
|
|
|
|
rows = image_data[
|
|
|
|
|
pool.starmap(write_tile_p, [
|
|
|
|
|
(z_path / f"{y}" / f"{x}.png", image_data[
|
|
|
|
|
y * tile_size : y * tile_size + tile_size,
|
|
|
|
|
x * tile_size : x * tile_size + tile_size,
|
|
|
|
|
]
|
|
|
|
|
Writer(tile_size, tile_size, greyscale = False, alpha = alpha).write_packed(path.open("wb"), rows)
|
|
|
|
|
])
|
|
|
|
|
for y in range(tiles_per_side)
|
|
|
|
|
for x in range(tiles_per_side)
|
|
|
|
|
])
|
|
|
|
|
if not quiet:
|
|
|
|
|
print("done")
|
|
|
|
|
del image_data
|
|
|
|
|
|
|
|
|
|
def get_scan_data() -> tuple[NDArray[np.uint32], NDArray[np.uint32]]:
|
|
|
|
|
print(f"reading scan data from file '{input_path}'...", end = " ", flush = True)
|
|
|
|
|
data = np.fromfile(input_path, dtype = np.uint32).reshape(-1, 2)
|
|
|
|
|
ip_arr = np.copy(data.T[0])
|
|
|
|
|
rtt_arr = np.copy(data.T[1])
|
|
|
|
|
print("done")
|
|
|
|
|
return (ip_arr, rtt_arr)
|
|
|
|
|
|
|
|
|
|
def get_all_data() -> tuple[tuple[NDArray[np.uint16], NDArray[np.uint16]], NDArray[np.uint32]]:
|
|
|
|
|
ip_arr, rtt_arr = get_scan_data()
|
|
|
|
|
print(f"reading coordinates from file '{coords_path}'...", end = " ", flush = True)
|
|
|
|
|
ip_coords = np.fromfile(coords_path, dtype = np.uint16).reshape(-1, 2)
|
|
|
|
|
print("done")
|
|
|
|
|
print(f"converting ip addresses to coordinates...", end = " ", flush = True)
|
|
|
|
|
xs, ys = ip_coords[ip_arr].T
|
|
|
|
|
print("done")
|
|
|
|
|
return ((ys, xs), rtt_arr)
|
|
|
|
|
|
|
|
|
|
coords, rtt_arr = get_all_data()
|
|
|
|
|
|
|
|
|
|
def generate_density():
|
|
|
|
|
possible_overlaps = 1
|
|
|
|
|
|
|
|
|
|
print(f"allocating empty {num_ips_sqrt}x{num_ips_sqrt} array of density data...", end = " ", flush = True)
|
|
|
|
|
density_data = np.zeros((num_ips_sqrt, num_ips_sqrt), dtype = np.uint32)
|
|
|
|
|
print("done")
|
|
|
|
|
print(f"assigning values to density data array...", end = " ", flush = True)
|
|
|
|
|
density_data[coords] = possible_overlaps
|
|
|
|
|
print("done")
|
|
|
|
|
|
|
|
|
|
def squish():
|
|
|
|
|
nonlocal density_data
|
|
|
|
|
def scale_down_coords(scale = 2):
|
|
|
|
|
nonlocal df
|
|
|
|
|
nonlocal tiles_per_side
|
|
|
|
|
nonlocal possible_overlaps
|
|
|
|
|
density_data = np.swapaxes(density_data.reshape(density_data.shape[0] // 2, 2, density_data.shape[1] // 2, 2), 1, 2)
|
|
|
|
|
print("calculating density sum...", end = " ", flush = True)
|
|
|
|
|
density_data[:, :, 0, 0] += density_data[:, :, 0, 1]
|
|
|
|
|
density_data[:, :, 0, 0] += density_data[:, :, 1, 0]
|
|
|
|
|
density_data[:, :, 0, 0] += density_data[:, :, 1, 1]
|
|
|
|
|
print(f"done (shrunk density data from {density_data.shape[0] * 2}x{density_data.shape[1] * 2} -> {density_data.shape[0]}x{density_data.shape[1]})")
|
|
|
|
|
density_data = density_data[:, :, 0, 0]
|
|
|
|
|
possible_overlaps *= 4
|
|
|
|
|
|
|
|
|
|
if skip_iters is not None:
|
|
|
|
|
for _ in range(skip_iters):
|
|
|
|
|
squish()
|
|
|
|
|
prev_tiles_per_side = tiles_per_side
|
|
|
|
|
tiles_per_side //= scale
|
|
|
|
|
possible_overlaps *= scale * scale
|
|
|
|
|
|
|
|
|
|
def write_all_colormaps():
|
|
|
|
|
for colormap_name, colormap in colormaps:
|
|
|
|
|
create_images(density_data, colormap, possible_overlaps, tiles_dir / "density" / colormap_name)
|
|
|
|
|
if not quiet:
|
|
|
|
|
print(f"scaling {len(df)} coords down from {prev_tiles_per_side}x{prev_tiles_per_side} tiles to {tiles_per_side}x{tiles_per_side} tiles...", end = " ", flush = True)
|
|
|
|
|
df = df.with_columns(x = pl.col("x") // scale, y = pl.col("y") // scale).group_by(["x", "y"]).agg(count = pl.sum("count"), rtt_us = pl.median("rtt_us"))
|
|
|
|
|
if not quiet:
|
|
|
|
|
print(f"done with {len(df)} coords remaining")
|
|
|
|
|
|
|
|
|
|
write_all_colormaps()
|
|
|
|
|
while density_data.shape[0] > tile_size:
|
|
|
|
|
squish()
|
|
|
|
|
write_all_colormaps()
|
|
|
|
|
if skip_iters is not None and skip_iters > 0:
|
|
|
|
|
scale_down_coords(1 << skip_iters)
|
|
|
|
|
|
|
|
|
|
def generate_rtt():
|
|
|
|
|
num_colors = (1 << 16) - 1
|
|
|
|
|
multiplier = num_colors - 1
|
|
|
|
|
|
|
|
|
|
def get_rtt_data():
|
|
|
|
|
nonlocal rtt_arr
|
|
|
|
|
print(f"retrieving {quantile:.1%} quantile for rtt data...", end = " ", flush = True)
|
|
|
|
|
rtt_quantile = np.quantile(rtt_arr, quantile)
|
|
|
|
|
print("done")
|
|
|
|
|
print(f"scaling rtt data using rtt quantile...", end = " ", flush = True)
|
|
|
|
|
rtt_arr_f = rtt_arr / rtt_quantile
|
|
|
|
|
print("done")
|
|
|
|
|
del rtt_arr
|
|
|
|
|
collect()
|
|
|
|
|
print("clipping rtt data between 0 and 1...", end = " ", flush = True)
|
|
|
|
|
rtt_arr_f.clip(0, 1, out = rtt_arr_f)
|
|
|
|
|
print("done")
|
|
|
|
|
|
|
|
|
|
print(f"allocating empty {num_ips_sqrt}x{num_ips_sqrt} array for rtt data...", end = " ", flush = True)
|
|
|
|
|
rtt_data = np.full((num_ips_sqrt, num_ips_sqrt), np.nan, dtype = np.float32)
|
|
|
|
|
print("done")
|
|
|
|
|
print(f"assigning values to rtt data array...", end = " ", flush = True)
|
|
|
|
|
rtt_data[coords] = rtt_arr_f
|
|
|
|
|
print("done")
|
|
|
|
|
return rtt_data
|
|
|
|
|
|
|
|
|
|
rtt_data = get_rtt_data()
|
|
|
|
|
|
|
|
|
|
def squish():
|
|
|
|
|
nonlocal rtt_data
|
|
|
|
|
print(f"sorting rtt values for median calculation...", end = " ", flush = True)
|
|
|
|
|
rtt_data = np.swapaxes(rtt_data.reshape(rtt_data.shape[0] // 2, 2, rtt_data.shape[1] // 2, 2), 1, 2)
|
|
|
|
|
rtt_data[np.isnan(rtt_data)] = np.inf # convert NaNs to Inf so comparisons work correctly
|
|
|
|
|
mask = np.empty((rtt_data.shape[0], rtt_data.shape[1]), dtype = np.bool_)
|
|
|
|
|
np.greater(rtt_data[:, :, 0, 0], rtt_data[:, :, 0, 1], out = mask) # sort first row
|
|
|
|
|
rtt_data[mask, 0] = rtt_data[mask, 0, ::-1]
|
|
|
|
|
np.greater(rtt_data[:, :, 1, 0], rtt_data[:, :, 1, 1], out = mask) # sort second row
|
|
|
|
|
rtt_data[mask, 1] = rtt_data[mask, 1, ::-1]
|
|
|
|
|
np.greater(rtt_data[:, :, 0, 0], rtt_data[:, :, 1, 0], out = mask) # sort first column
|
|
|
|
|
rtt_data[mask, :, 0] = rtt_data[mask, ::-1, 0]
|
|
|
|
|
np.less(rtt_data[:, :, 0, 1], rtt_data[:, :, 1, 1], out = mask) # sort second column in reverse order
|
|
|
|
|
rtt_data[mask, :, 1] = rtt_data[mask, ::-1, 1]
|
|
|
|
|
np.less(rtt_data[:, :, 1, 0], rtt_data[:, :, 1, 1], out = mask) # sort second row in reverse order
|
|
|
|
|
rtt_data[mask, 1] = rtt_data[mask, 1, ::-1]
|
|
|
|
|
# rtt_data[:, :, :, 1] = rtt_data[:, :, ::-1, 1] # swap second column (not entirely necessary, just makes indices below nicer)
|
|
|
|
|
rtt_data[np.isinf(rtt_data)] = np.nan # restore NaNs
|
|
|
|
|
print("done")
|
|
|
|
|
print("calculating median rtt values...", end = " ", flush = True)
|
|
|
|
|
mask2 = np.empty((rtt_data.shape[0], rtt_data.shape[1]), dtype = np.bool_) # need second mask for binary ops
|
|
|
|
|
np.invert(np.isnan(rtt_data[:, :, 0, 1], out = mask), out = mask) # four nums populated
|
|
|
|
|
rtt_data[mask, 0, 0] = rtt_data[mask, 1, 1] # take average of index 1 and 2
|
|
|
|
|
rtt_data[mask, 0, 0] += rtt_data[mask, 1, 0]
|
|
|
|
|
rtt_data[mask, 0, 0] /= 2
|
|
|
|
|
np.logical_and(np.invert(np.isnan(rtt_data[:, :, 1, 0], out = mask), out = mask), np.isnan(rtt_data[:, :, 0, 1], out = mask2), out = mask) # three nums populated
|
|
|
|
|
rtt_data[mask, 0, 0] = rtt_data[mask, 1, 1] # take index 1
|
|
|
|
|
np.logical_and(np.invert(np.isnan(rtt_data[:, :, 1, 1], out = mask), out = mask), np.isnan(rtt_data[:, :, 1, 0], out = mask2), out = mask) # two nums populated
|
|
|
|
|
rtt_data[mask, 0, 0] = rtt_data[mask, 0, 0] # take average of index 0 and 1
|
|
|
|
|
rtt_data[mask, 0, 0] += rtt_data[mask, 1, 1]
|
|
|
|
|
rtt_data[mask, 0, 0] /= 2
|
|
|
|
|
# everything else (1 or 0 nums populated) don't need any modifications
|
|
|
|
|
print(f"done (shrunk rtt data from {rtt_data.shape[0] * 2}x{rtt_data.shape[1] * 2} -> {rtt_data.shape[0]}x{rtt_data.shape[1]})")
|
|
|
|
|
rtt_data = rtt_data[:, :, 0, 0]
|
|
|
|
|
|
|
|
|
|
if skip_iters is not None:
|
|
|
|
|
for _ in range(skip_iters):
|
|
|
|
|
squish()
|
|
|
|
|
|
|
|
|
|
def get_normalized_data():
|
|
|
|
|
print(f"normalizing rtt data: multiplying...", end = " ", flush = True)
|
|
|
|
|
rtt_data_f = rtt_data * multiplier
|
|
|
|
|
print(f"incrementing...", end = " ", flush = True)
|
|
|
|
|
rtt_data_f += 1
|
|
|
|
|
# print(f"replacing NaNs...", end = " ", flush = True)
|
|
|
|
|
# rtt_data_f[np.isnan(rtt_data_f)] = 0.0
|
|
|
|
|
print(f"converting to ints...", end = " ", flush = True)
|
|
|
|
|
with catch_warnings():
|
|
|
|
|
rtt_data_norm = rtt_data_f.astype(np.uint16)
|
|
|
|
|
print("done")
|
|
|
|
|
return rtt_data_norm
|
|
|
|
|
|
|
|
|
|
def write_all_colormaps():
|
|
|
|
|
rtt_data_norm = get_normalized_data()
|
|
|
|
|
for colormap_name, colormap in colormaps:
|
|
|
|
|
create_images(rtt_data_norm, colormap, num_colors, tiles_dir / "rtt" / colormap_name)
|
|
|
|
|
|
|
|
|
|
write_all_colormaps()
|
|
|
|
|
while rtt_data.shape[0] > tile_size:
|
|
|
|
|
squish()
|
|
|
|
|
write_all_colormaps()
|
|
|
|
|
|
|
|
|
|
if should_generate_rtt:
|
|
|
|
|
generate_rtt()
|
|
|
|
|
else:
|
|
|
|
|
del rtt_arr
|
|
|
|
|
collect()
|
|
|
|
|
if should_generate_density:
|
|
|
|
|
generate_density()
|
|
|
|
|
while True:
|
|
|
|
|
for colormap in colormaps:
|
|
|
|
|
if generate_density:
|
|
|
|
|
divisor = 256 if possible_overlaps == 1 else possible_overlaps
|
|
|
|
|
series = df.get_column("count") / divisor
|
|
|
|
|
generate_images(colormap, "density", series)
|
|
|
|
|
if generate_rtt:
|
|
|
|
|
series = df.get_column("rtt_us") / rtt_quantile
|
|
|
|
|
generate_images(colormap, "rtt", series)
|
|
|
|
|
if tiles_per_side == 1:
|
|
|
|
|
break
|
|
|
|
|
scale_down_coords()
|
|
|
|
|
|
|
|
|
|
if json_path is not None and tiles_dir_parts is not None:
|
|
|
|
|
try:
|
|
|
|
|
text = json_path.read_text(encoding = "UTF-8")
|
|
|
|
|
except:
|
|
|
|
|
if not quiet:
|
|
|
|
|
print("json file not found at provided path, so it will be created instead")
|
|
|
|
|
tile_metadata = {}
|
|
|
|
|
else:
|
|
|
|
|
try:
|
|
|
|
|
tile_metadata: dict = loads(text)
|
|
|
|
|
tile_metadata: dict = json.loads(text)
|
|
|
|
|
except:
|
|
|
|
|
if not quiet:
|
|
|
|
|
print("invalid json found at provided path, so re-creating file")
|
|
|
|
|
tile_metadata = {}
|
|
|
|
|
tile_metadata_cur = tile_metadata
|
|
|
|
@ -328,16 +194,18 @@ def make_tiles(coords_path: Path, input_path: Path, tiles_dir: Path, *,
|
|
|
|
|
if not part in tile_metadata_cur:
|
|
|
|
|
tile_metadata_cur[part] = {}
|
|
|
|
|
tile_metadata_cur = tile_metadata_cur[part]
|
|
|
|
|
for variant_name in variant_names:
|
|
|
|
|
if not variant_name in tile_metadata_cur:
|
|
|
|
|
tile_metadata_cur[variant_name] = dedup_preserving_order(colormap_names)
|
|
|
|
|
for variant in variants:
|
|
|
|
|
if not variant in tile_metadata_cur:
|
|
|
|
|
tile_metadata_cur[variant] = colormaps
|
|
|
|
|
else:
|
|
|
|
|
tile_metadata_cur[variant_name] = dedup_preserving_order(tile_metadata_cur[variant_name] + colormap_names)
|
|
|
|
|
tile_metadata_cur[variant] = dedup_preserving_order(tile_metadata_cur[variant] + colormaps)
|
|
|
|
|
if not quiet:
|
|
|
|
|
print(f"writing metadata to json file at '{json_path}'...", end = " ", flush = True)
|
|
|
|
|
json_path.write_text(dumps(tile_metadata, indent=2), encoding = "UTF-8")
|
|
|
|
|
json_path.write_text(json.dumps(tile_metadata, indent=2), encoding = "UTF-8")
|
|
|
|
|
if not quiet:
|
|
|
|
|
print("done")
|
|
|
|
|
|
|
|
|
|
def remove_tiles(tiles_dir: Path, *, json_path: Path | None = None):
|
|
|
|
|
def remove_tiles(tiles_dir: Path, *, json_path: Path | None = None, quiet = False):
|
|
|
|
|
if not tiles_dir.is_dir():
|
|
|
|
|
raise ValueError(f"'{tiles_dir}' is not an existing directory")
|
|
|
|
|
|
|
|
|
@ -353,7 +221,7 @@ def remove_tiles(tiles_dir: Path, *, json_path: Path | None = None):
|
|
|
|
|
except:
|
|
|
|
|
raise ValueError("json file not found at provided path")
|
|
|
|
|
try:
|
|
|
|
|
tile_metadata = loads(text)
|
|
|
|
|
tile_metadata = json.loads(text)
|
|
|
|
|
except:
|
|
|
|
|
raise ValueError("invalid json found at provided path")
|
|
|
|
|
tile_metadata_cur = tile_metadata
|
|
|
|
@ -366,28 +234,30 @@ def remove_tiles(tiles_dir: Path, *, json_path: Path | None = None):
|
|
|
|
|
del tile_metadata_cur[tiles_dir_final]
|
|
|
|
|
except:
|
|
|
|
|
raise ValueError(f"unable to find path '{'/'.join([*tiles_dir_parts, tiles_dir_final])}' within json file")
|
|
|
|
|
if not quiet:
|
|
|
|
|
print(f"writing metadata to json file at '{json_path}'...", end = " ", flush = True)
|
|
|
|
|
json_path.write_text(dumps(tile_metadata, indent=2), encoding = "UTF-8")
|
|
|
|
|
json_path.write_text(json.dumps(tile_metadata, indent=2), encoding = "UTF-8")
|
|
|
|
|
if not quiet:
|
|
|
|
|
print("done")
|
|
|
|
|
|
|
|
|
|
if not quiet:
|
|
|
|
|
print(f"removing files from '{tiles_dir}'...", end = " ", flush = True)
|
|
|
|
|
rmtree(tiles_dir)
|
|
|
|
|
shutil.rmtree(tiles_dir)
|
|
|
|
|
if not quiet:
|
|
|
|
|
print("done")
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
class IpMapArgs:
|
|
|
|
|
command: Literal["mkcoords", "convert", "mktiles", "rmtiles"]
|
|
|
|
|
command: Literal["convert", "generate", "remove"]
|
|
|
|
|
quiet: bool
|
|
|
|
|
coords: str
|
|
|
|
|
input: str
|
|
|
|
|
output: str
|
|
|
|
|
batches: int
|
|
|
|
|
processes: int
|
|
|
|
|
tile_size: int
|
|
|
|
|
alpha: bool
|
|
|
|
|
variants: str
|
|
|
|
|
colormaps: str
|
|
|
|
|
quantile: float
|
|
|
|
|
processes: int
|
|
|
|
|
num_rows: int | None
|
|
|
|
|
skip_iters: int | None
|
|
|
|
|
json: str | None
|
|
|
|
@ -396,52 +266,45 @@ def parse_list_arg(arg: str):
|
|
|
|
|
return [x.strip().lower() for x in arg.split(",") if x.strip()]
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
parser = ArgumentParser("ipmap")
|
|
|
|
|
parser = argparse.ArgumentParser("ipmap")
|
|
|
|
|
parser.add_argument("-q", "--quiet", action = "store_true", help = "decrease output verbosity")
|
|
|
|
|
subparsers = parser.add_subparsers(dest = "command", required = True, help = "the command to run")
|
|
|
|
|
mkcoords_parser = subparsers.add_parser("mkcoords", help = "generate coordinates corresponding to each IP address")
|
|
|
|
|
mkcoords_parser.add_argument("-b", "--batches", default = default_batches, type = int, help = "the number of batches to split the task into (default: %(default)s)")
|
|
|
|
|
mkcoords_parser.add_argument("-p", "--processes", default = default_processes, type = int, help = "the number of processes to split the task across (default: %(default)s)")
|
|
|
|
|
mkcoords_parser.add_argument("output", help = "the output path to save the generated coordinates to")
|
|
|
|
|
convert_parser = subparsers.add_parser("convert", help = "convert scan data from csv to parquet format")
|
|
|
|
|
convert_parser.add_argument("input", help = "the input path of the csv file to read the scan data from")
|
|
|
|
|
convert_parser.add_argument("output", help = "the output path of the parquet file to save the converted scan data to")
|
|
|
|
|
mktiles_parser = subparsers.add_parser("mktiles", help = "generate tile images from scan data in parquet format")
|
|
|
|
|
mktiles_parser.add_argument("-t", "--tile-size", default = default_tile_size, type = int, help = "the tile size to use (default: %(default)s)")
|
|
|
|
|
mktiles_parser.add_argument("-a", "--alpha", action = "store_true", help = "use alpha channel instead of black")
|
|
|
|
|
mktiles_parser.add_argument("-v", "--variants", default = ",".join(default_variant_names), help = "a comma separated list of variants to generate (default: %(default)s)")
|
|
|
|
|
mktiles_parser.add_argument("-c", "--colormaps", default = ",".join(default_colormap_names), help = "a comma separated list of colormaps to generate (default: %(default)s)")
|
|
|
|
|
mktiles_parser.add_argument("-q", "--quantile", type = float, default = default_quantile, help = "the quantile to use for scaling data such as rtt (default: %(default)s)")
|
|
|
|
|
mktiles_parser.add_argument("-n", "--num-rows", type = int, help = "how many rows to read from the scan data (default: all)")
|
|
|
|
|
mktiles_parser.add_argument("-s", "--skip-iters", type = int, help = "how many iterations to skip generating images for (default: none)")
|
|
|
|
|
mktiles_parser.add_argument("-j", "--json", help = "the path for the json file to store metadata about the tile images (default: none)")
|
|
|
|
|
mktiles_parser.add_argument("coords", help = "the path of the binary file containing the coords to map IP addresses to")
|
|
|
|
|
mktiles_parser.add_argument("input", help = "the input path of the parquet file to read the scan data from")
|
|
|
|
|
mktiles_parser.add_argument("output", help = "the output path to save the generated tile images to")
|
|
|
|
|
rmtiles_parser = subparsers.add_parser("rmtiles", help = "remove tile images")
|
|
|
|
|
rmtiles_parser.add_argument("-j", "--json", help = "the path for the json file to store metadata about the tile images (default: none)")
|
|
|
|
|
rmtiles_parser.add_argument("input", help = "the path containing tile images to remove")
|
|
|
|
|
generate_parser = subparsers.add_parser("generate", help = "generate tile images from scan data in parquet format")
|
|
|
|
|
generate_parser.add_argument("-t", "--tile-size", default = default_tile_size, type = int, help = "the tile size to use (default: %(default)s)")
|
|
|
|
|
generate_parser.add_argument("-a", "--alpha", action = "store_true", help = "use alpha channel instead of black")
|
|
|
|
|
generate_parser.add_argument("-v", "--variants", default = ",".join(default_variants), help = "a comma separated list of variants to generate (default: %(default)s)")
|
|
|
|
|
generate_parser.add_argument("-c", "--colormaps", default = ",".join(default_colormaps), help = "a comma separated list of colormaps to generate (default: %(default)s)")
|
|
|
|
|
generate_parser.add_argument("-q", "--quantile", type = float, default = default_quantile, help = "the quantile to use for scaling data such as rtt (default: %(default)s)")
|
|
|
|
|
generate_parser.add_argument("-p", "--processes", default = default_processes, type = int, help = "how many processes to spawn for saving images (default: %(default)s)")
|
|
|
|
|
generate_parser.add_argument("-n", "--num-rows", type = int, help = "how many rows to read from the scan data (default: all)")
|
|
|
|
|
generate_parser.add_argument("-s", "--skip-iters", type = int, help = "how many iterations to skip generating images for (default: none)")
|
|
|
|
|
generate_parser.add_argument("-j", "--json", help = "the path for the json file to store metadata about the tile images (default: none)")
|
|
|
|
|
generate_parser.add_argument("input", help = "the input path of the parquet file to read the scan data from")
|
|
|
|
|
generate_parser.add_argument("output", help = "the output path to save the generated tile images to")
|
|
|
|
|
remove_parser = subparsers.add_parser("remove", help = "remove tile images")
|
|
|
|
|
remove_parser.add_argument("-j", "--json", help = "the path for the json file to store metadata about the tile images (default: none)")
|
|
|
|
|
remove_parser.add_argument("input", help = "the path containing tile images to remove")
|
|
|
|
|
args = parser.parse_args(namespace = IpMapArgs)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
with redirect_stdout(open(devnull, "w") if args.quiet else stdout):
|
|
|
|
|
match args.command:
|
|
|
|
|
case "mkcoords":
|
|
|
|
|
make_coords(output_path = Path(args.output), batches = args.batches, processes = args.processes)
|
|
|
|
|
case "convert":
|
|
|
|
|
convert(input_path = Path(args.input), output_path = Path(args.output))
|
|
|
|
|
case "mktiles":
|
|
|
|
|
make_tiles(coords_path = Path(args.coords), input_path = Path(args.input), tiles_dir = Path(args.output),
|
|
|
|
|
tile_size = args.tile_size, alpha = args.alpha, variant_names = parse_list_arg(args.variants),
|
|
|
|
|
colormap_names = parse_list_arg(args.colormaps), quantile = args.quantile,
|
|
|
|
|
num_rows = args.num_rows, skip_iters = args.skip_iters, json_path = Path(args.json) if args.json else None)
|
|
|
|
|
case "rmtiles":
|
|
|
|
|
remove_tiles(tiles_dir = Path(args.input), json_path = Path(args.json) if args.json else None)
|
|
|
|
|
case _:
|
|
|
|
|
if args.command == "convert":
|
|
|
|
|
convert_to_parquet(csv_path = Path(args.input), parquet_path = Path(args.output), quiet = args.quiet)
|
|
|
|
|
elif args.command == "generate":
|
|
|
|
|
generate_tiles(parquet_path = Path(args.input), tiles_dir = Path(args.output),
|
|
|
|
|
tile_size = args.tile_size, alpha = args.alpha, variants = parse_list_arg(args.variants),
|
|
|
|
|
colormaps = parse_list_arg(args.colormaps), quantile = args.quantile,
|
|
|
|
|
processes = args.processes, num_rows = args.num_rows, skip_iters = args.skip_iters,
|
|
|
|
|
json_path = Path(args.json) if args.json else None, quiet = args.quiet)
|
|
|
|
|
elif args.command == "remove":
|
|
|
|
|
remove_tiles(tiles_dir = Path(args.input), json_path = Path(args.json) if args.json else None, quiet = args.quiet)
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError("invalid command")
|
|
|
|
|
except ValueError as e:
|
|
|
|
|
print(f"error: {e}", file = stderr)
|
|
|
|
|
exit(1)
|
|
|
|
|
print(f"error: {e}", file = sys.stderr)
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|
|
|
|
|