Refactor stuff

2024-04-03 00:43:54 +11:00 · 2024-04-03 00:43:54 +11:00 · 4540c36cdd
commit 4540c36cdd
parent 2756281ddb
5 changed files with 267 additions and 152 deletions
--- a/index.html
+++ b/index.html
@ -34,65 +34,58 @@
 <body>
 <div id="map"></div>
 <script>
-    function hilbert_c2i({ x, y }) {
+    const hilbert_c2i = ({ x, y }) => {
-        var b, d;
+        let rotation = 0
-        var rotation = 0;
+        let reflection = 0
-        let reflection = 0;
+        let index = 0
-        let index = 0;
+        for (let b = 15; b >= 0; b--) {
-        for (b = 16; b--;) {
+            let bits = reflection
-            let bits = reflection;
+            reflection = (y >>> b) & 1
-
+            reflection |= ((x >>> b) & 1) << 1
-            reflection = (y >>> b) & 1;
+            bits = bits ^ reflection
-            reflection |= ((x >>> b) & 1) << 1;
+            bits = ((bits >>> rotation) | (bits << (2 - rotation))) & 3
-
+            index |= bits << (b << 1)
-            bits = bits ^ reflection;
+            reflection ^= (1 << rotation)
-            bits = ((bits >>> rotation) | (bits << (2 - rotation))) & 3;
+            bits = bits & (-bits) & 1
            index |= bits << (b << 1);
            reflection ^= (1 << rotation);
            bits = bits & (-bits) & 1;
            while (bits) { ++rotation; bits >>>= 1; }
            if (++rotation >= 2) rotation -= 2;
        }
        index ^= 0x2aaaaaaa;
        for (d = 1; d < 32; d *= 2) {
            let t = index >>> d;
            if (!t) break;
            index ^= t;
        }
        return index;
    }
    function hilbert_i2c(index) {
        var b, d, t;
        var rotation = 0;
        let reflection = 0;
        let coord = { x: 0, y: 0 };
        index ^= (index >>> 1) ^ 0x2aaaaaaa;
        for (b = 16; b--; ) {
            var bits = index >>> (2 * b) & 3;
            reflection ^= ((bits >>> (2 - rotation)) | (bits << rotation)) & 3;
            coord.x |= (reflection & 1) << b;
            coord.y |= ((reflection >>> 1) & 1) << b;
            reflection ^= (1 << rotation);
            bits &= (-bits) & 1;
            while (bits) {
-                bits >>>= 1;
+                ++rotation
-                ++rotation;
+                bits >>>= 1
            }
            if (++rotation >= 2)
-                rotation -= 2;
+                rotation -= 2
        }
-        return coord;
+        index ^= 0x2aaaaaaa
        for (let d = 1; d < 32; d *= 2) {
            let t = index >>> d
            if (!t) break
            index ^= t
        }
        return index
    }
    const hilbert_i2c = index => {
        let rotation = 0
        let reflection = 0
        let coord = { x: 0, y: 0 }
        index ^= (index >>> 1) ^ 0x2aaaaaaa
        for (let b = 15; b >= 0; b--) {
            var bits = index >>> (2 * b) & 3
            reflection ^= ((bits >>> (2 - rotation)) | (bits << rotation)) & 3
            coord.x |= (reflection & 1) << b
            coord.y |= ((reflection >>> 1) & 1) << b
            reflection ^= (1 << rotation)
            bits &= (-bits) & 1
            while (bits) {
                bits >>>= 1
                ++rotation
            }
            if (++rotation >= 2)
                rotation -= 2
        }
        return coord
    }
    const dateDir = (date = new Date()) => `${date.getFullYear()}-${(date.getMonth() + 1).toString().padStart(2, "0")}-${date.getDate().toString().padStart(2, "0")}`
    const map = new maplibregl.Map({
        container: "map",
@ -101,24 +94,24 @@
        doubleClickZoom: false,
        dragRotate: false,
        style: {
-            "version": 8,
+            version: 8,
-            "sources": {
+            sources: {
-                "raster-tiles": {
+                "ipmap-tiles": {
-                    "type": "raster",
+                    type: "raster",
-                    "tiles": [
+                    tiles: [
-                        "tiles/{z}/{y}/{x}.png"
+                        "tiles/2024-03-30/density/jet/{z}/{y}/{x}.png" // change to using remote json with list of tilemaps
                    ],
-                    "minzoom": 0,
+                    minzoom: 0,
-                    "maxzoom": 8,
+                    maxzoom: 8,
-                    "tileSize": 256
+                    tileSize: 256
                }
            },
-            "layers": [
+            layers: [
                {
-                    "id": "simple-tiles",
+                    id: "ipmap-tiles-layer",
-                    "type": "raster",
+                    type: "raster",
-                    "source": "raster-tiles",
+                    source: "ipmap-tiles",
-                    "paint": {
+                    paint: {
                        "raster-resampling": "nearest"
                    }
                }
@ -144,6 +137,7 @@
            .setLngLat(e.lngLat)
            .addTo(map)
    })
    const setTileUrl = (date, variant, colormap) => map.getSource("ipmap-tiles").setTiles([`tiles/${dateDir(date)}/${variant}/${colormap}/{z}/{y}/{x}.png`])
 </script>
 </body>
 </html>
--- a/ipmap.py
+++ b/ipmap.py
@ -1,18 +1,22 @@
 #!/usr/bin/env python3
 import sys
 import math
 import functools
 import argparse
 from pathlib import Path
 import png
 import hilbert
 import numpy as np
 import polars as pl
 from cmap import Colormap
 from multiprocessing import Pool
 from dataclasses import dataclass
 from typing import Literal
-hilbert_coords = functools.partial(hilbert.decode, num_dims=2, num_bits=16)
+def convert_to_parquet(csv_path: Path, parquet_path: Path, quiet=False):
-
+    if not quiet:
-def convert_to_parquet(csv_path: Path, parquet_path: Path):
+        print(f"scanning csv '{csv_path}' into parquet '{parquet_path}'...", end=" ", flush=True)
    if not csv_path.exists():
        print(f"no csv found at \"{csv_path}\"")
        return
    lf = pl.scan_csv(csv_path, schema={
        "saddr": pl.String,
        "rtt_us": pl.UInt64,
@ -20,85 +24,167 @@ def convert_to_parquet(csv_path: Path, parquet_path: Path):
    })
    lf = lf.filter(pl.col("success") == 1)
    lf = lf.drop("success")
-    lf = lf.with_columns(rtt_us=pl.col("rtt_us").clip(0, 0xFFFFFFFF).cast(pl.UInt32))
+    lf = lf.with_columns(rtt_us = pl.col("rtt_us").clip(0, 0xFFFFFFFF).cast(pl.UInt32))
-    lf = lf.with_columns(saddr=pl.col("saddr").str.split_exact(".", 3).struct.rename_fields(["a", "b", "c", "d"]))
+    lf = lf.with_columns(saddr = pl.col("saddr").str.split_exact(".", 3).struct.rename_fields(["a", "b", "c", "d"]))
-    lf = lf.with_columns(saddr=pl.col("saddr").struct.field("a").cast(pl.UInt32) * 0x1000000 + pl.col("saddr").struct.field("b").cast(pl.UInt32) * 0x10000 + pl.col("saddr").struct.field("c").cast(pl.UInt32) * 0x100 + pl.col("saddr").struct.field("d").cast(pl.UInt32))
+    lf = lf.with_columns(saddr = pl.col("saddr").struct.field("a").cast(pl.UInt32) * 0x1000000 + pl.col("saddr").struct.field("b").cast(pl.UInt32) * 0x10000 + pl.col("saddr").struct.field("c").cast(pl.UInt32) * 0x100 + pl.col("saddr").struct.field("d").cast(pl.UInt32))
-    lf = lf.with_columns(coords=pl.col("saddr").map_batches(hilbert_coords, pl.Array(pl.UInt16, 2), is_elementwise=True))
+    lf = lf.unique("saddr")
-    lf = lf.with_columns(x=pl.col("coords").arr.get(0), y=pl.col("coords").arr.get(1))
+    lf = lf.with_columns(coords = pl.col("saddr").map_batches(functools.partial(hilbert.decode, num_dims=2, num_bits=16), pl.Array(pl.UInt16, 2), is_elementwise = True))
    lf = lf.with_columns(x = pl.col("coords").arr.get(0), y = pl.col("coords").arr.get(1))
    lf = lf.drop("coords")
    print(f"scanning csv \"{csv_path}\" into parquet \"{parquet_path}\"...", end=" ", flush=True)
    lf.sink_parquet(parquet_path)
-    print("done.")
+    if not quiet:
        print("done")
 def write_tile(path: Path, rows: np.ndarray):
    path.parent.mkdir(exist_ok=True, parents=True)
-    png.Writer(rows.shape[0], rows.shape[0], greyscale=False, alpha=False).write(path.open("wb"), rows)
+    png.Writer(rows.shape[0], rows.shape[1], greyscale=False, alpha=False).write_packed(path.open("wb"), rows)
-def generate_tiles(parquet_path: Path, tiles_dir: Path, tile_size=256, processes=16):
+default_tile_size = 256
-    print(f"reading parquet \"{parquet_path}\"...", end=" ", flush=True)
+default_colormaps = ["viridis"]
-    gdf = pl.read_parquet(parquet_path, columns=["x", "y", "rtt_us"])
+default_variants = ["density", "rtt"]
-    print("done.")
+default_processes = 16
 def generate_tiles(parquet_path: Path, tiles_dir: Path, tile_size=default_tile_size,
                   variants: list[str] = default_variants, colormaps: list[str] = default_colormaps,
                   processes=default_processes, num_rows: int | None = None,
                   skip_iters: int | None = None, quiet=False):
    if tile_size < 1 or tile_size > 0x10000 or tile_size & (tile_size - 1) != 0:
        raise ValueError(f"tile size must be a power of 2 between 1 and {0x10000}")
    if len(variants) == 0:
        raise ValueError("must specify at least one variant")
    if len(colormaps) == 0:
        raise ValueError("must specify at least one colormap")
    colormaps_by_name = { colormap: [bytes(c) for c in (Colormap(colormap).lut()[:,0:3] * (256.0 - np.finfo(np.float32).eps)).astype(np.uint8)] for colormap in colormaps }
    generate_density = False
    generate_rtt = False
    for variant in variants:
        if variant == "density":
            generate_density = True
        elif variant == "rtt":
            generate_rtt = True
        else:
            raise ValueError(f"unknown variant '{variant}'")
    if not quiet:
        print(f"reading parquet '{parquet_path}'...", end=" ", flush=True)
    df = pl.read_parquet(parquet_path, columns = ["x", "y", "rtt_us"], n_rows=num_rows).with_columns(count = pl.lit(1, pl.UInt32))
    if not quiet:
        print("done")
    channels = 3
    tile_size_by_channels = tile_size * channels
    tiles_per_side = int(math.sqrt(0x100000000)) // tile_size
    possible_overlaps = 1
    def generate_images(colormap: str, type_name: str, col_name: str, divisor: int):
        nonlocal df
        if not quiet:
            print(f"creating {type_name} image data with {colormap} colormap...", end=" ", flush=True)
        image_data = np.zeros((tiles_per_side * tile_size, tiles_per_side * tile_size), dtype = "S3")
        image_data[(df.get_column("y"), df.get_column("x"))] = (255 * df.get_column(col_name) // divisor).clip(0, 255).cast(pl.UInt8).replace(pl.int_range(256), colormaps_by_name[colormap], return_dtype=pl.Binary)
        if not quiet:
            print("done")
        if not quiet:
            print(f"writing {tiles_per_side}x{tiles_per_side}={tiles_per_side * tiles_per_side} {type_name} images with {colormap} colormap...", end=" ", flush=True)
        with Pool(processes) as pool:
            z = tiles_per_side.bit_length() - 1
            z_path = tiles_dir / type_name / colormap / f"{z}"
            z_path.mkdir(exist_ok=True, parents=True)
            pool.starmap(write_tile, [
                (z_path / f"{y}" / f"{x}.png", image_data[
                    y * tile_size : y * tile_size + tile_size,
                    x * tile_size : x * tile_size + tile_size,
                ])
                for y in range(tiles_per_side)
                for x in range(tiles_per_side)
            ])
        if not quiet:
            print("done")
        del image_data
    def scale_down_coords(scale = 2):
        nonlocal df
        nonlocal tiles_per_side
        nonlocal possible_overlaps
        prev_tiles_per_side = tiles_per_side
        tiles_per_side //= scale
        possible_overlaps *= scale * scale
        if not quiet:
            print(f"scaling {len(df)} coords down from {prev_tiles_per_side}x{prev_tiles_per_side} tiles to {tiles_per_side}x{tiles_per_side} tiles...", end=" ", flush=True)
        df = df.with_columns(x=pl.col("x") // scale, y=pl.col("y") // scale).group_by(["x", "y"]).agg(count=pl.sum("count"), rtt_us=pl.mean("rtt_us"))
        if not quiet:
            print(f"done with {len(df)} coords remaining")
    if skip_iters and skip_iters > 0:
        remaining_iters = tiles_per_side.bit_length() - skip_iters
        if remaining_iters <= 0:
            if not quiet:
                print("skipping all iters")
            return
        scale_down_coords(1 << skip_iters)
    while True:
-        # if tiles_per_side <= 16:
+        for colormap in colormaps:
-        z = int(math.log2(tiles_per_side))
+            if generate_density:
-        print(f"[{z=}] calculating colors...", end=" ", flush=True)
+                generate_images(colormap, "density", "count", 256 if possible_overlaps == 1 else possible_overlaps)
-        df = gdf.with_columns(gb=0xFF - (pl.col("rtt_us") / 3000).round().clip(0, 0xFF).cast(pl.UInt8))
+            if generate_rtt:
-        df = df.drop("rtt_us")
+                generate_images(colormap, "rtt", "rtt_us", int(df.get_column("rtt_us").std()))
        df = df.with_columns(r=0xFF, g=pl.col("gb"), b=pl.col("gb"))
        df = df.drop("gb")
        print("done.")
        total_size = tiles_per_side * tile_size
        print(f"[{z=}] creating image row data...", end=" ", flush=True)
        all_rows = np.zeros((total_size, total_size, channels), dtype = "uint8")
        all_rows[(df.get_column("y"), df.get_column("x"))] = df.select(["r", "g", "b"]).to_numpy()
        all_rows = all_rows.reshape(total_size, total_size * channels)
        print("done.")
        del df
        z_path = tiles_dir / f"{z}"
        z_path.mkdir(exist_ok=True, parents=True)
        print(f"[{z=}] creating individual tile data...", end=" ", flush=True)
        tile_data = [
            (z_path / f"{y}" / f"{x}.png", all_rows[
                y * tile_size : y * tile_size + tile_size,
                x * tile_size_by_channels : x * tile_size_by_channels + tile_size_by_channels
            ])
            for x in range(tiles_per_side)
            for y in range(tiles_per_side)
        ]
        print("done.")
        print(f"[{z=}] writing {tiles_per_side}x{tiles_per_side}={tiles_per_side * tiles_per_side} images...", end=" ", flush=True)
        with Pool(processes) as pool:
            pool.starmap(write_tile, tile_data)
        print("done.")
        del tile_data
        del all_rows
        if tiles_per_side == 1:
            break
-        old_tiles_per_side = tiles_per_side
+        scale_down_coords()
        tiles_per_side //= 2
-        print(f"[{z=}] rescaling {len(gdf)} coords from {old_tiles_per_side}x{old_tiles_per_side} to {tiles_per_side}x{tiles_per_side} tiles...", end=" ", flush=True)
+@dataclass
-        new_gdf = gdf.with_columns(x=pl.col("x") // 2, y=pl.col("y") // 2)
+class IpMapArgs:
-        del gdf
+    command: Literal["convert", "generate"]
-        gdf = new_gdf.group_by(["x", "y"]).mean()
+    quiet: bool
-        print(f"done. {len(gdf)} coords remaining.")
+    input: str
    output: str
    tile_size: int
    colormaps: str
    variants: str
    processes: int
    num_rows: int | None
    skip_iters: int | None
 def parse_list_arg(arg: str):
    return [x.strip().lower() for x in arg.split(",") if x.strip()]
 def main():
-    data_dir = Path("data")
+    parser = argparse.ArgumentParser("ipmap")
-    csv_path = data_dir / "full-scan.csv"
+    parser.add_argument("-q", "--quiet", action="store_true", help="decrease output verbosity")
-    parquet_path = data_dir / "full-scan.parquet"
+    subparsers = parser.add_subparsers(dest="command", required=True, help="the command to run")
-    if not parquet_path.exists():
+    convert_parser = subparsers.add_parser("convert", help="convert scan data from csv to parquet format")
-        print(f"no parquet file found at \"{parquet_path}\", generating now...")
+    convert_parser.add_argument("input", help="the input path of the csv file to read the scan data from")
-        convert_to_parquet(csv_path, parquet_path)
+    convert_parser.add_argument("output", help="the output path of the parquet file to save the converted scan data to")
-    tiles_dir = Path("tiles")
+    generate_parser = subparsers.add_parser("generate", help="generate tile images from scan data in parquet format")
-    generate_tiles(parquet_path, tiles_dir)
+    generate_parser.add_argument("-t", "--tile-size", default=default_tile_size, type=int, help="the tile size to use (default: %(default)s)")
    generate_parser.add_argument("-v", "--variants", default=",".join(default_variants), help="a comma separated list of variants to generate (default: %(default)s)")
    generate_parser.add_argument("-c", "--colormaps", default=",".join(default_colormaps), help="a comma separated list of colormaps to generate (default: %(default)s)")
    generate_parser.add_argument("-p", "--processes", default=default_processes, type=int, help="how many processes to spawn for saving images (default: %(default)s)")
    generate_parser.add_argument("-n", "--num-rows", type=int, help="how many rows to read from the scan data (default: all)")
    generate_parser.add_argument("-s", "--skip-iters", type=int, help="how many iterations to skip generating images for (default: none)")
    generate_parser.add_argument("input", help="the input path of the parquet file to read the scan data from")
    generate_parser.add_argument("output", help="the output path to save the generated tile images to")
    args = parser.parse_args(namespace=IpMapArgs)
    try:
        if args.command == "convert":
            convert_to_parquet(csv_path=Path(args.input), parquet_path=Path(args.output), quiet=args.quiet)
        elif args.command == "generate":
            generate_tiles(parquet_path=Path(args.input), tiles_dir=Path(args.output),
                        tile_size=args.tile_size, variants=parse_list_arg(args.variants),
                        colormaps=parse_list_arg(args.colormaps), processes=args.processes,
                        num_rows=args.num_rows, skip_iters=args.skip_iters, quiet=args.quiet)
        else:
            raise ValueError("invalid command")
    except ValueError as e:
        print(f"error: {e}")
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/poetry.lock
+++ b/poetry.lock
@ -1,18 +1,24 @@
 # This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
 [[package]]
-name = "iptools"
+name = "cmap"
-version = "0.7.0"
+version = "0.1.3"
-description = "Python utilites for manipulating IPv4 and IPv6 addresses"
+description = "Scientific colormaps for python, without dependencies"
 optional = false
-python-versions = "*"
+python-versions = ">=3.8"
 files = [
-    {file = "iptools-0.7.0-py2.py3-none-any.whl", hash = "sha256:a91fc7478fd795ac6b2d47c869fb46db7666ffec817bcb0560ef119e204237f0"},
+    {file = "cmap-0.1.3-py3-none-any.whl", hash = "sha256:47be4b515612c0d7991622aea064083d43dd1e1ff3e102818b8cc96f45b96e7e"},
-    {file = "iptools-0.7.0.tar.gz", hash = "sha256:118a4f638bb5fa0123df56fe3be703b112a689167539bcc194f8698ccdd9e2ea"},
+    {file = "cmap-0.1.3.tar.gz", hash = "sha256:41bca45e7e2c8699ee89fa821ccd956660d94084390e6eba106459590d856c66"},
 ]
 [package.dependencies]
 numpy = "*"
 [package.extras]
-testing = ["nose (>=1.0)"]
+dev = ["black", "ipython", "mypy", "pdbpp", "pre-commit", "pytest", "pytest-cov", "rich", "ruff"]
 docs = ["colorcet", "colorspacious", "imageio", "mkdocs", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings-python"]
 test = ["matplotlib", "numba", "pytest (>=6.0)", "pytest-cov"]
 thirdparty = ["bokeh", "colour", "napari", "plotly", "pydantic", "pygfx", "pytest-qt", "rich", "viscm", "vispy"]
 [[package]]
 name = "numpy"
@ -122,4 +128,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "ae4649713b0931b2549ea9b0951c28123e2c1d7abcdf26bfd202ac2aa039d9db"
+content-hash = "4766b735c32d1dbed994187370f478f0b535a151f6ddbd08210f57ed64f2e9d9"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -8,10 +8,10 @@ license = "AGPLv3"
 [tool.poetry.dependencies]
 python = "^3.11"
 pypng = "^0.20220715.0"
 iptools = "^0.7.0"
 numpy = "^1.26.4"
 numpy-hilbert-curve = "^1.0.1"
 polars-lts-cpu = "^0.20.17"
 cmap = "^0.1.3"
 [build-system]
--- a/zmap.sh
+++ b/zmap.sh
@ -1,5 +1,34 @@
 #!/bin/sh
-OUTFILE="data/$(date +"%Y-%m-%d")-full-scan.csv"
+DATE=$(date +"%Y-%m-%d")
-zmap -B '100M' -M icmp_echo_time '0.0.0.0/0' -f 'saddr,rtt_us,success' -o "$OUTFILE" && \
+
-scp "$OUTFILE" user@to_host:"/destination/$OUTFILE"
+# change these variables
 DATA_FILENAME=full-scan
 LOCAL_DATA_PATH=/data
 LOCAL_IPMAP_PATH=/scripts/ipmap.py
 REMOTE_USER=root
 REMOTE_HOST=localhost
 REMOTE_DATA_PATH=/data
 REMOTE_TILES_PATH=/tiles
 REMOTE_IPMAP_PATH=/scripts/ipmap.py
 COLORMAPS=jet,fake_parula,viridis,plasma,thermal,batlow
 VARIANTS=density,rtt
 # do not change these variables
 CSV_FILENAME="$DATA_FILENAME.csv"
 PARQUET_FILENAME="$DATA_FILENAME.parquet"
 CURRENT_LOCAL_DATA_PATH="$LOCAL_DATA_PATH/$DATE"
 LOCAL_CSV_PATH="$CURRENT_LOCAL_DATA_PATH/$CSV_FILENAME"
 LOCAL_PARQUET_PATH="$CURRENT_LOCAL_DATA_PATH/$PARQUET_FILENAME"
 REMOTE="$REMOTE_USER@$REMOTE_HOST"
 CURRENT_REMOTE_DATA_PATH="$REMOTE_DATA_PATH/$DATE"
 REMOTE_PARQUET_PATH="$CURRENT_REMOTE_DATA_PATH/$PARQUET_FILENAME"
 CURRENT_REMOTE_TILES_PATH="$REMOTE_TILES_PATH/$DATE"
 mkdir -p "$CURRENT_LOCAL_DATA_PATH" && \
 zmap -B '100M' -M icmp_echo_time '0.0.0.0/0' -f 'saddr,rtt_us,success' -o "$LOCAL_CSV_PATH" && \
 "$LOCAL_IPMAP_PATH" convert "$LOCAL_CSV_PATH" "$LOCAL_PARQUET_PATH" && \
 ssh "$REMOTE" "'"mkdir -p "$CURRENT_REMOTE_DATA_PATH""'" && \
 scp "$LOCAL_PARQUET_PATH" "$REMOTE":"$REMOTE_PARQUET_PATH" && \
 ssh "$REMOTE" "'"mkdir -p "$CURRENT_REMOTE_TILES_PATH""'" && \
 ssh "$REMOTE" "'""$REMOTE_IPMAP_PATH" generate -c "$COLORMAPS" -v "$VARIANTS" "$REMOTE_PARQUET_PATH" "$CURRENT_REMOTE_TILES_PATH""'"