Refactor stuff

This commit is contained in:
LilyRose2798 2024-04-03 00:43:54 +11:00
parent 2756281ddb
commit 4540c36cdd
5 changed files with 267 additions and 152 deletions

View file

@ -34,66 +34,59 @@
<body>
<div id="map"></div>
<script>
function hilbert_c2i({ x, y }) {
var b, d;
var rotation = 0;
let reflection = 0;
let index = 0;
for (b = 16; b--;) {
let bits = reflection;
reflection = (y >>> b) & 1;
reflection |= ((x >>> b) & 1) << 1;
bits = bits ^ reflection;
bits = ((bits >>> rotation) | (bits << (2 - rotation))) & 3;
index |= bits << (b << 1);
reflection ^= (1 << rotation);
bits = bits & (-bits) & 1;
while (bits) { ++rotation; bits >>>= 1; }
if (++rotation >= 2) rotation -= 2;
}
index ^= 0x2aaaaaaa;
for (d = 1; d < 32; d *= 2) {
let t = index >>> d;
if (!t) break;
index ^= t;
}
return index;
}
function hilbert_i2c(index) {
var b, d, t;
var rotation = 0;
let reflection = 0;
let coord = { x: 0, y: 0 };
index ^= (index >>> 1) ^ 0x2aaaaaaa;
for (b = 16; b--; ) {
var bits = index >>> (2 * b) & 3;
reflection ^= ((bits >>> (2 - rotation)) | (bits << rotation)) & 3;
coord.x |= (reflection & 1) << b;
coord.y |= ((reflection >>> 1) & 1) << b;
reflection ^= (1 << rotation);
bits &= (-bits) & 1;
const hilbert_c2i = ({ x, y }) => {
let rotation = 0
let reflection = 0
let index = 0
for (let b = 15; b >= 0; b--) {
let bits = reflection
reflection = (y >>> b) & 1
reflection |= ((x >>> b) & 1) << 1
bits = bits ^ reflection
bits = ((bits >>> rotation) | (bits << (2 - rotation))) & 3
index |= bits << (b << 1)
reflection ^= (1 << rotation)
bits = bits & (-bits) & 1
while (bits) {
bits >>>= 1;
++rotation;
++rotation
bits >>>= 1
}
if (++rotation >= 2)
rotation -= 2;
rotation -= 2
}
return coord;
index ^= 0x2aaaaaaa
for (let d = 1; d < 32; d *= 2) {
let t = index >>> d
if (!t) break
index ^= t
}
return index
}
const hilbert_i2c = index => {
let rotation = 0
let reflection = 0
let coord = { x: 0, y: 0 }
index ^= (index >>> 1) ^ 0x2aaaaaaa
for (let b = 15; b >= 0; b--) {
var bits = index >>> (2 * b) & 3
reflection ^= ((bits >>> (2 - rotation)) | (bits << rotation)) & 3
coord.x |= (reflection & 1) << b
coord.y |= ((reflection >>> 1) & 1) << b
reflection ^= (1 << rotation)
bits &= (-bits) & 1
while (bits) {
bits >>>= 1
++rotation
}
if (++rotation >= 2)
rotation -= 2
}
return coord
}
const dateDir = (date = new Date()) => `${date.getFullYear()}-${(date.getMonth() + 1).toString().padStart(2, "0")}-${date.getDate().toString().padStart(2, "0")}`
const map = new maplibregl.Map({
container: "map",
attributionControl: false,
@ -101,24 +94,24 @@
doubleClickZoom: false,
dragRotate: false,
style: {
"version": 8,
"sources": {
"raster-tiles": {
"type": "raster",
"tiles": [
"tiles/{z}/{y}/{x}.png"
version: 8,
sources: {
"ipmap-tiles": {
type: "raster",
tiles: [
"tiles/2024-03-30/density/jet/{z}/{y}/{x}.png" // change to using remote json with list of tilemaps
],
"minzoom": 0,
"maxzoom": 8,
"tileSize": 256
minzoom: 0,
maxzoom: 8,
tileSize: 256
}
},
"layers": [
layers: [
{
"id": "simple-tiles",
"type": "raster",
"source": "raster-tiles",
"paint": {
id: "ipmap-tiles-layer",
type: "raster",
source: "ipmap-tiles",
paint: {
"raster-resampling": "nearest"
}
}
@ -144,6 +137,7 @@
.setLngLat(e.lngLat)
.addTo(map)
})
const setTileUrl = (date, variant, colormap) => map.getSource("ipmap-tiles").setTiles([`tiles/${dateDir(date)}/${variant}/${colormap}/{z}/{y}/{x}.png`])
</script>
</body>
</html>

230
ipmap.py Normal file → Executable file
View file

@ -1,18 +1,22 @@
#!/usr/bin/env python3
import sys
import math
import functools
import argparse
from pathlib import Path
import png
import hilbert
import numpy as np
import polars as pl
from cmap import Colormap
from multiprocessing import Pool
from dataclasses import dataclass
from typing import Literal
hilbert_coords = functools.partial(hilbert.decode, num_dims=2, num_bits=16)
def convert_to_parquet(csv_path: Path, parquet_path: Path):
if not csv_path.exists():
print(f"no csv found at \"{csv_path}\"")
return
def convert_to_parquet(csv_path: Path, parquet_path: Path, quiet=False):
if not quiet:
print(f"scanning csv '{csv_path}' into parquet '{parquet_path}'...", end=" ", flush=True)
lf = pl.scan_csv(csv_path, schema={
"saddr": pl.String,
"rtt_us": pl.UInt64,
@ -20,85 +24,167 @@ def convert_to_parquet(csv_path: Path, parquet_path: Path):
})
lf = lf.filter(pl.col("success") == 1)
lf = lf.drop("success")
lf = lf.with_columns(rtt_us=pl.col("rtt_us").clip(0, 0xFFFFFFFF).cast(pl.UInt32))
lf = lf.with_columns(saddr=pl.col("saddr").str.split_exact(".", 3).struct.rename_fields(["a", "b", "c", "d"]))
lf = lf.with_columns(saddr=pl.col("saddr").struct.field("a").cast(pl.UInt32) * 0x1000000 + pl.col("saddr").struct.field("b").cast(pl.UInt32) * 0x10000 + pl.col("saddr").struct.field("c").cast(pl.UInt32) * 0x100 + pl.col("saddr").struct.field("d").cast(pl.UInt32))
lf = lf.with_columns(coords=pl.col("saddr").map_batches(hilbert_coords, pl.Array(pl.UInt16, 2), is_elementwise=True))
lf = lf.with_columns(x=pl.col("coords").arr.get(0), y=pl.col("coords").arr.get(1))
lf = lf.with_columns(rtt_us = pl.col("rtt_us").clip(0, 0xFFFFFFFF).cast(pl.UInt32))
lf = lf.with_columns(saddr = pl.col("saddr").str.split_exact(".", 3).struct.rename_fields(["a", "b", "c", "d"]))
lf = lf.with_columns(saddr = pl.col("saddr").struct.field("a").cast(pl.UInt32) * 0x1000000 + pl.col("saddr").struct.field("b").cast(pl.UInt32) * 0x10000 + pl.col("saddr").struct.field("c").cast(pl.UInt32) * 0x100 + pl.col("saddr").struct.field("d").cast(pl.UInt32))
lf = lf.unique("saddr")
lf = lf.with_columns(coords = pl.col("saddr").map_batches(functools.partial(hilbert.decode, num_dims=2, num_bits=16), pl.Array(pl.UInt16, 2), is_elementwise = True))
lf = lf.with_columns(x = pl.col("coords").arr.get(0), y = pl.col("coords").arr.get(1))
lf = lf.drop("coords")
print(f"scanning csv \"{csv_path}\" into parquet \"{parquet_path}\"...", end=" ", flush=True)
lf.sink_parquet(parquet_path)
print("done.")
if not quiet:
print("done")
def write_tile(path: Path, rows: np.ndarray):
path.parent.mkdir(exist_ok=True, parents=True)
png.Writer(rows.shape[0], rows.shape[0], greyscale=False, alpha=False).write(path.open("wb"), rows)
png.Writer(rows.shape[0], rows.shape[1], greyscale=False, alpha=False).write_packed(path.open("wb"), rows)
def generate_tiles(parquet_path: Path, tiles_dir: Path, tile_size=256, processes=16):
print(f"reading parquet \"{parquet_path}\"...", end=" ", flush=True)
gdf = pl.read_parquet(parquet_path, columns=["x", "y", "rtt_us"])
print("done.")
default_tile_size = 256
default_colormaps = ["viridis"]
default_variants = ["density", "rtt"]
default_processes = 16
def generate_tiles(parquet_path: Path, tiles_dir: Path, tile_size=default_tile_size,
variants: list[str] = default_variants, colormaps: list[str] = default_colormaps,
processes=default_processes, num_rows: int | None = None,
skip_iters: int | None = None, quiet=False):
if tile_size < 1 or tile_size > 0x10000 or tile_size & (tile_size - 1) != 0:
raise ValueError(f"tile size must be a power of 2 between 1 and {0x10000}")
if len(variants) == 0:
raise ValueError("must specify at least one variant")
if len(colormaps) == 0:
raise ValueError("must specify at least one colormap")
colormaps_by_name = { colormap: [bytes(c) for c in (Colormap(colormap).lut()[:,0:3] * (256.0 - np.finfo(np.float32).eps)).astype(np.uint8)] for colormap in colormaps }
generate_density = False
generate_rtt = False
for variant in variants:
if variant == "density":
generate_density = True
elif variant == "rtt":
generate_rtt = True
else:
raise ValueError(f"unknown variant '{variant}'")
if not quiet:
print(f"reading parquet '{parquet_path}'...", end=" ", flush=True)
df = pl.read_parquet(parquet_path, columns = ["x", "y", "rtt_us"], n_rows=num_rows).with_columns(count = pl.lit(1, pl.UInt32))
if not quiet:
print("done")
channels = 3
tile_size_by_channels = tile_size * channels
tiles_per_side = int(math.sqrt(0x100000000)) // tile_size
while True:
# if tiles_per_side <= 16:
z = int(math.log2(tiles_per_side))
print(f"[{z=}] calculating colors...", end=" ", flush=True)
df = gdf.with_columns(gb=0xFF - (pl.col("rtt_us") / 3000).round().clip(0, 0xFF).cast(pl.UInt8))
df = df.drop("rtt_us")
df = df.with_columns(r=0xFF, g=pl.col("gb"), b=pl.col("gb"))
df = df.drop("gb")
print("done.")
total_size = tiles_per_side * tile_size
print(f"[{z=}] creating image row data...", end=" ", flush=True)
all_rows = np.zeros((total_size, total_size, channels), dtype = "uint8")
all_rows[(df.get_column("y"), df.get_column("x"))] = df.select(["r", "g", "b"]).to_numpy()
all_rows = all_rows.reshape(total_size, total_size * channels)
print("done.")
del df
z_path = tiles_dir / f"{z}"
z_path.mkdir(exist_ok=True, parents=True)
print(f"[{z=}] creating individual tile data...", end=" ", flush=True)
tile_data = [
(z_path / f"{y}" / f"{x}.png", all_rows[
y * tile_size : y * tile_size + tile_size,
x * tile_size_by_channels : x * tile_size_by_channels + tile_size_by_channels
])
for x in range(tiles_per_side)
for y in range(tiles_per_side)
]
print("done.")
print(f"[{z=}] writing {tiles_per_side}x{tiles_per_side}={tiles_per_side * tiles_per_side} images...", end=" ", flush=True)
possible_overlaps = 1
def generate_images(colormap: str, type_name: str, col_name: str, divisor: int):
nonlocal df
if not quiet:
print(f"creating {type_name} image data with {colormap} colormap...", end=" ", flush=True)
image_data = np.zeros((tiles_per_side * tile_size, tiles_per_side * tile_size), dtype = "S3")
image_data[(df.get_column("y"), df.get_column("x"))] = (255 * df.get_column(col_name) // divisor).clip(0, 255).cast(pl.UInt8).replace(pl.int_range(256), colormaps_by_name[colormap], return_dtype=pl.Binary)
if not quiet:
print("done")
if not quiet:
print(f"writing {tiles_per_side}x{tiles_per_side}={tiles_per_side * tiles_per_side} {type_name} images with {colormap} colormap...", end=" ", flush=True)
with Pool(processes) as pool:
pool.starmap(write_tile, tile_data)
print("done.")
del tile_data
del all_rows
z = tiles_per_side.bit_length() - 1
z_path = tiles_dir / type_name / colormap / f"{z}"
z_path.mkdir(exist_ok=True, parents=True)
pool.starmap(write_tile, [
(z_path / f"{y}" / f"{x}.png", image_data[
y * tile_size : y * tile_size + tile_size,
x * tile_size : x * tile_size + tile_size,
])
for y in range(tiles_per_side)
for x in range(tiles_per_side)
])
if not quiet:
print("done")
del image_data
def scale_down_coords(scale = 2):
nonlocal df
nonlocal tiles_per_side
nonlocal possible_overlaps
prev_tiles_per_side = tiles_per_side
tiles_per_side //= scale
possible_overlaps *= scale * scale
if not quiet:
print(f"scaling {len(df)} coords down from {prev_tiles_per_side}x{prev_tiles_per_side} tiles to {tiles_per_side}x{tiles_per_side} tiles...", end=" ", flush=True)
df = df.with_columns(x=pl.col("x") // scale, y=pl.col("y") // scale).group_by(["x", "y"]).agg(count=pl.sum("count"), rtt_us=pl.mean("rtt_us"))
if not quiet:
print(f"done with {len(df)} coords remaining")
if skip_iters and skip_iters > 0:
remaining_iters = tiles_per_side.bit_length() - skip_iters
if remaining_iters <= 0:
if not quiet:
print("skipping all iters")
return
scale_down_coords(1 << skip_iters)
while True:
for colormap in colormaps:
if generate_density:
generate_images(colormap, "density", "count", 256 if possible_overlaps == 1 else possible_overlaps)
if generate_rtt:
generate_images(colormap, "rtt", "rtt_us", int(df.get_column("rtt_us").std()))
if tiles_per_side == 1:
break
old_tiles_per_side = tiles_per_side
tiles_per_side //= 2
print(f"[{z=}] rescaling {len(gdf)} coords from {old_tiles_per_side}x{old_tiles_per_side} to {tiles_per_side}x{tiles_per_side} tiles...", end=" ", flush=True)
new_gdf = gdf.with_columns(x=pl.col("x") // 2, y=pl.col("y") // 2)
del gdf
gdf = new_gdf.group_by(["x", "y"]).mean()
print(f"done. {len(gdf)} coords remaining.")
scale_down_coords()
@dataclass
class IpMapArgs:
command: Literal["convert", "generate"]
quiet: bool
input: str
output: str
tile_size: int
colormaps: str
variants: str
processes: int
num_rows: int | None
skip_iters: int | None
def parse_list_arg(arg: str):
return [x.strip().lower() for x in arg.split(",") if x.strip()]
def main():
data_dir = Path("data")
csv_path = data_dir / "full-scan.csv"
parquet_path = data_dir / "full-scan.parquet"
if not parquet_path.exists():
print(f"no parquet file found at \"{parquet_path}\", generating now...")
convert_to_parquet(csv_path, parquet_path)
tiles_dir = Path("tiles")
generate_tiles(parquet_path, tiles_dir)
parser = argparse.ArgumentParser("ipmap")
parser.add_argument("-q", "--quiet", action="store_true", help="decrease output verbosity")
subparsers = parser.add_subparsers(dest="command", required=True, help="the command to run")
convert_parser = subparsers.add_parser("convert", help="convert scan data from csv to parquet format")
convert_parser.add_argument("input", help="the input path of the csv file to read the scan data from")
convert_parser.add_argument("output", help="the output path of the parquet file to save the converted scan data to")
generate_parser = subparsers.add_parser("generate", help="generate tile images from scan data in parquet format")
generate_parser.add_argument("-t", "--tile-size", default=default_tile_size, type=int, help="the tile size to use (default: %(default)s)")
generate_parser.add_argument("-v", "--variants", default=",".join(default_variants), help="a comma separated list of variants to generate (default: %(default)s)")
generate_parser.add_argument("-c", "--colormaps", default=",".join(default_colormaps), help="a comma separated list of colormaps to generate (default: %(default)s)")
generate_parser.add_argument("-p", "--processes", default=default_processes, type=int, help="how many processes to spawn for saving images (default: %(default)s)")
generate_parser.add_argument("-n", "--num-rows", type=int, help="how many rows to read from the scan data (default: all)")
generate_parser.add_argument("-s", "--skip-iters", type=int, help="how many iterations to skip generating images for (default: none)")
generate_parser.add_argument("input", help="the input path of the parquet file to read the scan data from")
generate_parser.add_argument("output", help="the output path to save the generated tile images to")
args = parser.parse_args(namespace=IpMapArgs)
try:
if args.command == "convert":
convert_to_parquet(csv_path=Path(args.input), parquet_path=Path(args.output), quiet=args.quiet)
elif args.command == "generate":
generate_tiles(parquet_path=Path(args.input), tiles_dir=Path(args.output),
tile_size=args.tile_size, variants=parse_list_arg(args.variants),
colormaps=parse_list_arg(args.colormaps), processes=args.processes,
num_rows=args.num_rows, skip_iters=args.skip_iters, quiet=args.quiet)
else:
raise ValueError("invalid command")
except ValueError as e:
print(f"error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

22
poetry.lock generated
View file

@ -1,18 +1,24 @@
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]]
name = "iptools"
version = "0.7.0"
description = "Python utilites for manipulating IPv4 and IPv6 addresses"
name = "cmap"
version = "0.1.3"
description = "Scientific colormaps for python, without dependencies"
optional = false
python-versions = "*"
python-versions = ">=3.8"
files = [
{file = "iptools-0.7.0-py2.py3-none-any.whl", hash = "sha256:a91fc7478fd795ac6b2d47c869fb46db7666ffec817bcb0560ef119e204237f0"},
{file = "iptools-0.7.0.tar.gz", hash = "sha256:118a4f638bb5fa0123df56fe3be703b112a689167539bcc194f8698ccdd9e2ea"},
{file = "cmap-0.1.3-py3-none-any.whl", hash = "sha256:47be4b515612c0d7991622aea064083d43dd1e1ff3e102818b8cc96f45b96e7e"},
{file = "cmap-0.1.3.tar.gz", hash = "sha256:41bca45e7e2c8699ee89fa821ccd956660d94084390e6eba106459590d856c66"},
]
[package.dependencies]
numpy = "*"
[package.extras]
testing = ["nose (>=1.0)"]
dev = ["black", "ipython", "mypy", "pdbpp", "pre-commit", "pytest", "pytest-cov", "rich", "ruff"]
docs = ["colorcet", "colorspacious", "imageio", "mkdocs", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings-python"]
test = ["matplotlib", "numba", "pytest (>=6.0)", "pytest-cov"]
thirdparty = ["bokeh", "colour", "napari", "plotly", "pydantic", "pygfx", "pytest-qt", "rich", "viscm", "vispy"]
[[package]]
name = "numpy"
@ -122,4 +128,4 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "ae4649713b0931b2549ea9b0951c28123e2c1d7abcdf26bfd202ac2aa039d9db"
content-hash = "4766b735c32d1dbed994187370f478f0b535a151f6ddbd08210f57ed64f2e9d9"

View file

@ -8,10 +8,10 @@ license = "AGPLv3"
[tool.poetry.dependencies]
python = "^3.11"
pypng = "^0.20220715.0"
iptools = "^0.7.0"
numpy = "^1.26.4"
numpy-hilbert-curve = "^1.0.1"
polars-lts-cpu = "^0.20.17"
cmap = "^0.1.3"
[build-system]

35
zmap.sh
View file

@ -1,5 +1,34 @@
#!/bin/sh
OUTFILE="data/$(date +"%Y-%m-%d")-full-scan.csv"
zmap -B '100M' -M icmp_echo_time '0.0.0.0/0' -f 'saddr,rtt_us,success' -o "$OUTFILE" && \
scp "$OUTFILE" user@to_host:"/destination/$OUTFILE"
DATE=$(date +"%Y-%m-%d")
# change these variables
DATA_FILENAME=full-scan
LOCAL_DATA_PATH=/data
LOCAL_IPMAP_PATH=/scripts/ipmap.py
REMOTE_USER=root
REMOTE_HOST=localhost
REMOTE_DATA_PATH=/data
REMOTE_TILES_PATH=/tiles
REMOTE_IPMAP_PATH=/scripts/ipmap.py
COLORMAPS=jet,fake_parula,viridis,plasma,thermal,batlow
VARIANTS=density,rtt
# do not change these variables
CSV_FILENAME="$DATA_FILENAME.csv"
PARQUET_FILENAME="$DATA_FILENAME.parquet"
CURRENT_LOCAL_DATA_PATH="$LOCAL_DATA_PATH/$DATE"
LOCAL_CSV_PATH="$CURRENT_LOCAL_DATA_PATH/$CSV_FILENAME"
LOCAL_PARQUET_PATH="$CURRENT_LOCAL_DATA_PATH/$PARQUET_FILENAME"
REMOTE="$REMOTE_USER@$REMOTE_HOST"
CURRENT_REMOTE_DATA_PATH="$REMOTE_DATA_PATH/$DATE"
REMOTE_PARQUET_PATH="$CURRENT_REMOTE_DATA_PATH/$PARQUET_FILENAME"
CURRENT_REMOTE_TILES_PATH="$REMOTE_TILES_PATH/$DATE"
mkdir -p "$CURRENT_LOCAL_DATA_PATH" && \
zmap -B '100M' -M icmp_echo_time '0.0.0.0/0' -f 'saddr,rtt_us,success' -o "$LOCAL_CSV_PATH" && \
"$LOCAL_IPMAP_PATH" convert "$LOCAL_CSV_PATH" "$LOCAL_PARQUET_PATH" && \
ssh "$REMOTE" "'"mkdir -p "$CURRENT_REMOTE_DATA_PATH""'" && \
scp "$LOCAL_PARQUET_PATH" "$REMOTE":"$REMOTE_PARQUET_PATH" && \
ssh "$REMOTE" "'"mkdir -p "$CURRENT_REMOTE_TILES_PATH""'" && \
ssh "$REMOTE" "'""$REMOTE_IPMAP_PATH" generate -c "$COLORMAPS" -v "$VARIANTS" "$REMOTE_PARQUET_PATH" "$CURRENT_REMOTE_TILES_PATH""'"