Refactor stuff

This commit is contained in:
LilyRose2798 2024-04-03 00:43:54 +11:00
parent 2756281ddb
commit 4540c36cdd
5 changed files with 267 additions and 152 deletions

View File

@ -34,65 +34,58 @@
<body> <body>
<div id="map"></div> <div id="map"></div>
<script> <script>
function hilbert_c2i({ x, y }) { const hilbert_c2i = ({ x, y }) => {
var b, d; let rotation = 0
var rotation = 0; let reflection = 0
let reflection = 0; let index = 0
let index = 0; for (let b = 15; b >= 0; b--) {
for (b = 16; b--;) { let bits = reflection
let bits = reflection; reflection = (y >>> b) & 1
reflection |= ((x >>> b) & 1) << 1
reflection = (y >>> b) & 1; bits = bits ^ reflection
reflection |= ((x >>> b) & 1) << 1; bits = ((bits >>> rotation) | (bits << (2 - rotation))) & 3
index |= bits << (b << 1)
bits = bits ^ reflection; reflection ^= (1 << rotation)
bits = ((bits >>> rotation) | (bits << (2 - rotation))) & 3; bits = bits & (-bits) & 1
index |= bits << (b << 1);
reflection ^= (1 << rotation);
bits = bits & (-bits) & 1;
while (bits) { ++rotation; bits >>>= 1; }
if (++rotation >= 2) rotation -= 2;
}
index ^= 0x2aaaaaaa;
for (d = 1; d < 32; d *= 2) {
let t = index >>> d;
if (!t) break;
index ^= t;
}
return index;
}
function hilbert_i2c(index) {
var b, d, t;
var rotation = 0;
let reflection = 0;
let coord = { x: 0, y: 0 };
index ^= (index >>> 1) ^ 0x2aaaaaaa;
for (b = 16; b--; ) {
var bits = index >>> (2 * b) & 3;
reflection ^= ((bits >>> (2 - rotation)) | (bits << rotation)) & 3;
coord.x |= (reflection & 1) << b;
coord.y |= ((reflection >>> 1) & 1) << b;
reflection ^= (1 << rotation);
bits &= (-bits) & 1;
while (bits) { while (bits) {
bits >>>= 1; ++rotation
++rotation; bits >>>= 1
} }
if (++rotation >= 2) if (++rotation >= 2)
rotation -= 2; rotation -= 2
} }
return coord; index ^= 0x2aaaaaaa
for (let d = 1; d < 32; d *= 2) {
let t = index >>> d
if (!t) break
index ^= t
} }
return index
}
const hilbert_i2c = index => {
let rotation = 0
let reflection = 0
let coord = { x: 0, y: 0 }
index ^= (index >>> 1) ^ 0x2aaaaaaa
for (let b = 15; b >= 0; b--) {
var bits = index >>> (2 * b) & 3
reflection ^= ((bits >>> (2 - rotation)) | (bits << rotation)) & 3
coord.x |= (reflection & 1) << b
coord.y |= ((reflection >>> 1) & 1) << b
reflection ^= (1 << rotation)
bits &= (-bits) & 1
while (bits) {
bits >>>= 1
++rotation
}
if (++rotation >= 2)
rotation -= 2
}
return coord
}
const dateDir = (date = new Date()) => `${date.getFullYear()}-${(date.getMonth() + 1).toString().padStart(2, "0")}-${date.getDate().toString().padStart(2, "0")}`
const map = new maplibregl.Map({ const map = new maplibregl.Map({
container: "map", container: "map",
@ -101,24 +94,24 @@
doubleClickZoom: false, doubleClickZoom: false,
dragRotate: false, dragRotate: false,
style: { style: {
"version": 8, version: 8,
"sources": { sources: {
"raster-tiles": { "ipmap-tiles": {
"type": "raster", type: "raster",
"tiles": [ tiles: [
"tiles/{z}/{y}/{x}.png" "tiles/2024-03-30/density/jet/{z}/{y}/{x}.png" // change to using remote json with list of tilemaps
], ],
"minzoom": 0, minzoom: 0,
"maxzoom": 8, maxzoom: 8,
"tileSize": 256 tileSize: 256
} }
}, },
"layers": [ layers: [
{ {
"id": "simple-tiles", id: "ipmap-tiles-layer",
"type": "raster", type: "raster",
"source": "raster-tiles", source: "ipmap-tiles",
"paint": { paint: {
"raster-resampling": "nearest" "raster-resampling": "nearest"
} }
} }
@ -144,6 +137,7 @@
.setLngLat(e.lngLat) .setLngLat(e.lngLat)
.addTo(map) .addTo(map)
}) })
const setTileUrl = (date, variant, colormap) => map.getSource("ipmap-tiles").setTiles([`tiles/${dateDir(date)}/${variant}/${colormap}/{z}/{y}/{x}.png`])
</script> </script>
</body> </body>
</html> </html>

218
ipmap.py Normal file → Executable file
View File

@ -1,18 +1,22 @@
#!/usr/bin/env python3
import sys
import math import math
import functools import functools
import argparse
from pathlib import Path from pathlib import Path
import png import png
import hilbert import hilbert
import numpy as np import numpy as np
import polars as pl import polars as pl
from cmap import Colormap
from multiprocessing import Pool from multiprocessing import Pool
from dataclasses import dataclass
from typing import Literal
hilbert_coords = functools.partial(hilbert.decode, num_dims=2, num_bits=16) def convert_to_parquet(csv_path: Path, parquet_path: Path, quiet=False):
if not quiet:
def convert_to_parquet(csv_path: Path, parquet_path: Path): print(f"scanning csv '{csv_path}' into parquet '{parquet_path}'...", end=" ", flush=True)
if not csv_path.exists():
print(f"no csv found at \"{csv_path}\"")
return
lf = pl.scan_csv(csv_path, schema={ lf = pl.scan_csv(csv_path, schema={
"saddr": pl.String, "saddr": pl.String,
"rtt_us": pl.UInt64, "rtt_us": pl.UInt64,
@ -23,82 +27,164 @@ def convert_to_parquet(csv_path: Path, parquet_path: Path):
lf = lf.with_columns(rtt_us = pl.col("rtt_us").clip(0, 0xFFFFFFFF).cast(pl.UInt32)) lf = lf.with_columns(rtt_us = pl.col("rtt_us").clip(0, 0xFFFFFFFF).cast(pl.UInt32))
lf = lf.with_columns(saddr = pl.col("saddr").str.split_exact(".", 3).struct.rename_fields(["a", "b", "c", "d"])) lf = lf.with_columns(saddr = pl.col("saddr").str.split_exact(".", 3).struct.rename_fields(["a", "b", "c", "d"]))
lf = lf.with_columns(saddr = pl.col("saddr").struct.field("a").cast(pl.UInt32) * 0x1000000 + pl.col("saddr").struct.field("b").cast(pl.UInt32) * 0x10000 + pl.col("saddr").struct.field("c").cast(pl.UInt32) * 0x100 + pl.col("saddr").struct.field("d").cast(pl.UInt32)) lf = lf.with_columns(saddr = pl.col("saddr").struct.field("a").cast(pl.UInt32) * 0x1000000 + pl.col("saddr").struct.field("b").cast(pl.UInt32) * 0x10000 + pl.col("saddr").struct.field("c").cast(pl.UInt32) * 0x100 + pl.col("saddr").struct.field("d").cast(pl.UInt32))
lf = lf.with_columns(coords=pl.col("saddr").map_batches(hilbert_coords, pl.Array(pl.UInt16, 2), is_elementwise=True)) lf = lf.unique("saddr")
lf = lf.with_columns(coords = pl.col("saddr").map_batches(functools.partial(hilbert.decode, num_dims=2, num_bits=16), pl.Array(pl.UInt16, 2), is_elementwise = True))
lf = lf.with_columns(x = pl.col("coords").arr.get(0), y = pl.col("coords").arr.get(1)) lf = lf.with_columns(x = pl.col("coords").arr.get(0), y = pl.col("coords").arr.get(1))
lf = lf.drop("coords") lf = lf.drop("coords")
print(f"scanning csv \"{csv_path}\" into parquet \"{parquet_path}\"...", end=" ", flush=True)
lf.sink_parquet(parquet_path) lf.sink_parquet(parquet_path)
print("done.") if not quiet:
print("done")
def write_tile(path: Path, rows: np.ndarray): def write_tile(path: Path, rows: np.ndarray):
path.parent.mkdir(exist_ok=True, parents=True) path.parent.mkdir(exist_ok=True, parents=True)
png.Writer(rows.shape[0], rows.shape[0], greyscale=False, alpha=False).write(path.open("wb"), rows) png.Writer(rows.shape[0], rows.shape[1], greyscale=False, alpha=False).write_packed(path.open("wb"), rows)
def generate_tiles(parquet_path: Path, tiles_dir: Path, tile_size=256, processes=16): default_tile_size = 256
print(f"reading parquet \"{parquet_path}\"...", end=" ", flush=True) default_colormaps = ["viridis"]
gdf = pl.read_parquet(parquet_path, columns=["x", "y", "rtt_us"]) default_variants = ["density", "rtt"]
print("done.") default_processes = 16
def generate_tiles(parquet_path: Path, tiles_dir: Path, tile_size=default_tile_size,
variants: list[str] = default_variants, colormaps: list[str] = default_colormaps,
processes=default_processes, num_rows: int | None = None,
skip_iters: int | None = None, quiet=False):
if tile_size < 1 or tile_size > 0x10000 or tile_size & (tile_size - 1) != 0:
raise ValueError(f"tile size must be a power of 2 between 1 and {0x10000}")
if len(variants) == 0:
raise ValueError("must specify at least one variant")
if len(colormaps) == 0:
raise ValueError("must specify at least one colormap")
colormaps_by_name = { colormap: [bytes(c) for c in (Colormap(colormap).lut()[:,0:3] * (256.0 - np.finfo(np.float32).eps)).astype(np.uint8)] for colormap in colormaps }
generate_density = False
generate_rtt = False
for variant in variants:
if variant == "density":
generate_density = True
elif variant == "rtt":
generate_rtt = True
else:
raise ValueError(f"unknown variant '{variant}'")
if not quiet:
print(f"reading parquet '{parquet_path}'...", end=" ", flush=True)
df = pl.read_parquet(parquet_path, columns = ["x", "y", "rtt_us"], n_rows=num_rows).with_columns(count = pl.lit(1, pl.UInt32))
if not quiet:
print("done")
channels = 3
tile_size_by_channels = tile_size * channels
tiles_per_side = int(math.sqrt(0x100000000)) // tile_size tiles_per_side = int(math.sqrt(0x100000000)) // tile_size
possible_overlaps = 1
def generate_images(colormap: str, type_name: str, col_name: str, divisor: int):
nonlocal df
if not quiet:
print(f"creating {type_name} image data with {colormap} colormap...", end=" ", flush=True)
image_data = np.zeros((tiles_per_side * tile_size, tiles_per_side * tile_size), dtype = "S3")
image_data[(df.get_column("y"), df.get_column("x"))] = (255 * df.get_column(col_name) // divisor).clip(0, 255).cast(pl.UInt8).replace(pl.int_range(256), colormaps_by_name[colormap], return_dtype=pl.Binary)
if not quiet:
print("done")
if not quiet:
print(f"writing {tiles_per_side}x{tiles_per_side}={tiles_per_side * tiles_per_side} {type_name} images with {colormap} colormap...", end=" ", flush=True)
with Pool(processes) as pool:
z = tiles_per_side.bit_length() - 1
z_path = tiles_dir / type_name / colormap / f"{z}"
z_path.mkdir(exist_ok=True, parents=True)
pool.starmap(write_tile, [
(z_path / f"{y}" / f"{x}.png", image_data[
y * tile_size : y * tile_size + tile_size,
x * tile_size : x * tile_size + tile_size,
])
for y in range(tiles_per_side)
for x in range(tiles_per_side)
])
if not quiet:
print("done")
del image_data
def scale_down_coords(scale = 2):
nonlocal df
nonlocal tiles_per_side
nonlocal possible_overlaps
prev_tiles_per_side = tiles_per_side
tiles_per_side //= scale
possible_overlaps *= scale * scale
if not quiet:
print(f"scaling {len(df)} coords down from {prev_tiles_per_side}x{prev_tiles_per_side} tiles to {tiles_per_side}x{tiles_per_side} tiles...", end=" ", flush=True)
df = df.with_columns(x=pl.col("x") // scale, y=pl.col("y") // scale).group_by(["x", "y"]).agg(count=pl.sum("count"), rtt_us=pl.mean("rtt_us"))
if not quiet:
print(f"done with {len(df)} coords remaining")
if skip_iters and skip_iters > 0:
remaining_iters = tiles_per_side.bit_length() - skip_iters
if remaining_iters <= 0:
if not quiet:
print("skipping all iters")
return
scale_down_coords(1 << skip_iters)
while True: while True:
# if tiles_per_side <= 16: for colormap in colormaps:
z = int(math.log2(tiles_per_side)) if generate_density:
print(f"[{z=}] calculating colors...", end=" ", flush=True) generate_images(colormap, "density", "count", 256 if possible_overlaps == 1 else possible_overlaps)
df = gdf.with_columns(gb=0xFF - (pl.col("rtt_us") / 3000).round().clip(0, 0xFF).cast(pl.UInt8)) if generate_rtt:
df = df.drop("rtt_us") generate_images(colormap, "rtt", "rtt_us", int(df.get_column("rtt_us").std()))
df = df.with_columns(r=0xFF, g=pl.col("gb"), b=pl.col("gb"))
df = df.drop("gb")
print("done.")
total_size = tiles_per_side * tile_size
print(f"[{z=}] creating image row data...", end=" ", flush=True)
all_rows = np.zeros((total_size, total_size, channels), dtype = "uint8")
all_rows[(df.get_column("y"), df.get_column("x"))] = df.select(["r", "g", "b"]).to_numpy()
all_rows = all_rows.reshape(total_size, total_size * channels)
print("done.")
del df
z_path = tiles_dir / f"{z}"
z_path.mkdir(exist_ok=True, parents=True)
print(f"[{z=}] creating individual tile data...", end=" ", flush=True)
tile_data = [
(z_path / f"{y}" / f"{x}.png", all_rows[
y * tile_size : y * tile_size + tile_size,
x * tile_size_by_channels : x * tile_size_by_channels + tile_size_by_channels
])
for x in range(tiles_per_side)
for y in range(tiles_per_side)
]
print("done.")
print(f"[{z=}] writing {tiles_per_side}x{tiles_per_side}={tiles_per_side * tiles_per_side} images...", end=" ", flush=True)
with Pool(processes) as pool:
pool.starmap(write_tile, tile_data)
print("done.")
del tile_data
del all_rows
if tiles_per_side == 1: if tiles_per_side == 1:
break break
old_tiles_per_side = tiles_per_side scale_down_coords()
tiles_per_side //= 2
print(f"[{z=}] rescaling {len(gdf)} coords from {old_tiles_per_side}x{old_tiles_per_side} to {tiles_per_side}x{tiles_per_side} tiles...", end=" ", flush=True) @dataclass
new_gdf = gdf.with_columns(x=pl.col("x") // 2, y=pl.col("y") // 2) class IpMapArgs:
del gdf command: Literal["convert", "generate"]
gdf = new_gdf.group_by(["x", "y"]).mean() quiet: bool
print(f"done. {len(gdf)} coords remaining.") input: str
output: str
tile_size: int
colormaps: str
variants: str
processes: int
num_rows: int | None
skip_iters: int | None
def parse_list_arg(arg: str):
return [x.strip().lower() for x in arg.split(",") if x.strip()]
def main(): def main():
data_dir = Path("data") parser = argparse.ArgumentParser("ipmap")
csv_path = data_dir / "full-scan.csv" parser.add_argument("-q", "--quiet", action="store_true", help="decrease output verbosity")
parquet_path = data_dir / "full-scan.parquet" subparsers = parser.add_subparsers(dest="command", required=True, help="the command to run")
if not parquet_path.exists(): convert_parser = subparsers.add_parser("convert", help="convert scan data from csv to parquet format")
print(f"no parquet file found at \"{parquet_path}\", generating now...") convert_parser.add_argument("input", help="the input path of the csv file to read the scan data from")
convert_to_parquet(csv_path, parquet_path) convert_parser.add_argument("output", help="the output path of the parquet file to save the converted scan data to")
tiles_dir = Path("tiles") generate_parser = subparsers.add_parser("generate", help="generate tile images from scan data in parquet format")
generate_tiles(parquet_path, tiles_dir) generate_parser.add_argument("-t", "--tile-size", default=default_tile_size, type=int, help="the tile size to use (default: %(default)s)")
generate_parser.add_argument("-v", "--variants", default=",".join(default_variants), help="a comma separated list of variants to generate (default: %(default)s)")
generate_parser.add_argument("-c", "--colormaps", default=",".join(default_colormaps), help="a comma separated list of colormaps to generate (default: %(default)s)")
generate_parser.add_argument("-p", "--processes", default=default_processes, type=int, help="how many processes to spawn for saving images (default: %(default)s)")
generate_parser.add_argument("-n", "--num-rows", type=int, help="how many rows to read from the scan data (default: all)")
generate_parser.add_argument("-s", "--skip-iters", type=int, help="how many iterations to skip generating images for (default: none)")
generate_parser.add_argument("input", help="the input path of the parquet file to read the scan data from")
generate_parser.add_argument("output", help="the output path to save the generated tile images to")
args = parser.parse_args(namespace=IpMapArgs)
try:
if args.command == "convert":
convert_to_parquet(csv_path=Path(args.input), parquet_path=Path(args.output), quiet=args.quiet)
elif args.command == "generate":
generate_tiles(parquet_path=Path(args.input), tiles_dir=Path(args.output),
tile_size=args.tile_size, variants=parse_list_arg(args.variants),
colormaps=parse_list_arg(args.colormaps), processes=args.processes,
num_rows=args.num_rows, skip_iters=args.skip_iters, quiet=args.quiet)
else:
raise ValueError("invalid command")
except ValueError as e:
print(f"error: {e}")
sys.exit(1)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

22
poetry.lock generated
View File

@ -1,18 +1,24 @@
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. # This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]] [[package]]
name = "iptools" name = "cmap"
version = "0.7.0" version = "0.1.3"
description = "Python utilites for manipulating IPv4 and IPv6 addresses" description = "Scientific colormaps for python, without dependencies"
optional = false optional = false
python-versions = "*" python-versions = ">=3.8"
files = [ files = [
{file = "iptools-0.7.0-py2.py3-none-any.whl", hash = "sha256:a91fc7478fd795ac6b2d47c869fb46db7666ffec817bcb0560ef119e204237f0"}, {file = "cmap-0.1.3-py3-none-any.whl", hash = "sha256:47be4b515612c0d7991622aea064083d43dd1e1ff3e102818b8cc96f45b96e7e"},
{file = "iptools-0.7.0.tar.gz", hash = "sha256:118a4f638bb5fa0123df56fe3be703b112a689167539bcc194f8698ccdd9e2ea"}, {file = "cmap-0.1.3.tar.gz", hash = "sha256:41bca45e7e2c8699ee89fa821ccd956660d94084390e6eba106459590d856c66"},
] ]
[package.dependencies]
numpy = "*"
[package.extras] [package.extras]
testing = ["nose (>=1.0)"] dev = ["black", "ipython", "mypy", "pdbpp", "pre-commit", "pytest", "pytest-cov", "rich", "ruff"]
docs = ["colorcet", "colorspacious", "imageio", "mkdocs", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings-python"]
test = ["matplotlib", "numba", "pytest (>=6.0)", "pytest-cov"]
thirdparty = ["bokeh", "colour", "napari", "plotly", "pydantic", "pygfx", "pytest-qt", "rich", "viscm", "vispy"]
[[package]] [[package]]
name = "numpy" name = "numpy"
@ -122,4 +128,4 @@ files = [
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "ae4649713b0931b2549ea9b0951c28123e2c1d7abcdf26bfd202ac2aa039d9db" content-hash = "4766b735c32d1dbed994187370f478f0b535a151f6ddbd08210f57ed64f2e9d9"

View File

@ -8,10 +8,10 @@ license = "AGPLv3"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.11" python = "^3.11"
pypng = "^0.20220715.0" pypng = "^0.20220715.0"
iptools = "^0.7.0"
numpy = "^1.26.4" numpy = "^1.26.4"
numpy-hilbert-curve = "^1.0.1" numpy-hilbert-curve = "^1.0.1"
polars-lts-cpu = "^0.20.17" polars-lts-cpu = "^0.20.17"
cmap = "^0.1.3"
[build-system] [build-system]

35
zmap.sh
View File

@ -1,5 +1,34 @@
#!/bin/sh #!/bin/sh
OUTFILE="data/$(date +"%Y-%m-%d")-full-scan.csv" DATE=$(date +"%Y-%m-%d")
zmap -B '100M' -M icmp_echo_time '0.0.0.0/0' -f 'saddr,rtt_us,success' -o "$OUTFILE" && \
scp "$OUTFILE" user@to_host:"/destination/$OUTFILE" # change these variables
DATA_FILENAME=full-scan
LOCAL_DATA_PATH=/data
LOCAL_IPMAP_PATH=/scripts/ipmap.py
REMOTE_USER=root
REMOTE_HOST=localhost
REMOTE_DATA_PATH=/data
REMOTE_TILES_PATH=/tiles
REMOTE_IPMAP_PATH=/scripts/ipmap.py
COLORMAPS=jet,fake_parula,viridis,plasma,thermal,batlow
VARIANTS=density,rtt
# do not change these variables
CSV_FILENAME="$DATA_FILENAME.csv"
PARQUET_FILENAME="$DATA_FILENAME.parquet"
CURRENT_LOCAL_DATA_PATH="$LOCAL_DATA_PATH/$DATE"
LOCAL_CSV_PATH="$CURRENT_LOCAL_DATA_PATH/$CSV_FILENAME"
LOCAL_PARQUET_PATH="$CURRENT_LOCAL_DATA_PATH/$PARQUET_FILENAME"
REMOTE="$REMOTE_USER@$REMOTE_HOST"
CURRENT_REMOTE_DATA_PATH="$REMOTE_DATA_PATH/$DATE"
REMOTE_PARQUET_PATH="$CURRENT_REMOTE_DATA_PATH/$PARQUET_FILENAME"
CURRENT_REMOTE_TILES_PATH="$REMOTE_TILES_PATH/$DATE"
mkdir -p "$CURRENT_LOCAL_DATA_PATH" && \
zmap -B '100M' -M icmp_echo_time '0.0.0.0/0' -f 'saddr,rtt_us,success' -o "$LOCAL_CSV_PATH" && \
"$LOCAL_IPMAP_PATH" convert "$LOCAL_CSV_PATH" "$LOCAL_PARQUET_PATH" && \
ssh "$REMOTE" "'"mkdir -p "$CURRENT_REMOTE_DATA_PATH""'" && \
scp "$LOCAL_PARQUET_PATH" "$REMOTE":"$REMOTE_PARQUET_PATH" && \
ssh "$REMOTE" "'"mkdir -p "$CURRENT_REMOTE_TILES_PATH""'" && \
ssh "$REMOTE" "'""$REMOTE_IPMAP_PATH" generate -c "$COLORMAPS" -v "$VARIANTS" "$REMOTE_PARQUET_PATH" "$CURRENT_REMOTE_TILES_PATH""'"