Refactor convert command to remove polars entirely
This commit is contained in:
parent
3f61d7fac6
commit
27a8d2584d
23
ipmap.py
23
ipmap.py
|
@ -16,7 +16,6 @@ from png import Writer
|
||||||
from cmap import Colormap
|
from cmap import Colormap
|
||||||
from hilbert import decode
|
from hilbert import decode
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import polars as pl
|
|
||||||
|
|
||||||
ip_bytes = 4
|
ip_bytes = 4
|
||||||
ip_bits = ip_bytes * 8
|
ip_bits = ip_bytes * 8
|
||||||
|
@ -56,17 +55,17 @@ def make_coords(output_path: Path, batches = default_batches, processes = defaul
|
||||||
print(f"finished writing to file")
|
print(f"finished writing to file")
|
||||||
|
|
||||||
def convert(input_path: Path, output_path: Path):
|
def convert(input_path: Path, output_path: Path):
|
||||||
print(f"scanning csv '{input_path}' into array...", end = " ", flush = True)
|
print(f"reading csv '{input_path}' into array...", end = " ", flush = True)
|
||||||
lf = pl.scan_csv(input_path, schema = {
|
arr = np.loadtxt(input_path, dtype = np.uint32, delimiter = ",", skiprows = 1)
|
||||||
"saddr_raw": pl.UInt32,
|
print("done")
|
||||||
"rtt_us": pl.UInt64,
|
print("filtering out unsuccessful values...", end = " ", flush = True)
|
||||||
"success": pl.UInt8
|
arr = arr[arr[:, -1] == 1]
|
||||||
})
|
print("done")
|
||||||
lf = lf.filter(pl.col("success") == 1)
|
print("removing success column...", end = " ", flush = True)
|
||||||
lf = lf.drop("success")
|
arr = arr[:, :-1]
|
||||||
lf = lf.with_columns(rtt_us = pl.col("rtt_us").clip(0, 0xFFFFFFFF).cast(pl.UInt32))
|
print("done")
|
||||||
lf = lf.unique("saddr_raw")
|
print("removing duplicate IP addresses...", end = " ", flush = True)
|
||||||
arr = lf.collect().to_numpy()
|
arr = arr[np.unique(arr[:, 0], return_index = True)[1]]
|
||||||
print("done")
|
print("done")
|
||||||
print("converting IP addresses from big-endian to little-endian...", end = " ", flush = True)
|
print("converting IP addresses from big-endian to little-endian...", end = " ", flush = True)
|
||||||
arr[:, 0].byteswap(inplace = True)
|
arr[:, 0].byteswap(inplace = True)
|
||||||
|
|
|
@ -264,45 +264,6 @@ files = [
|
||||||
{file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
|
{file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "polars-lts-cpu"
|
|
||||||
version = "0.20.17"
|
|
||||||
description = "Blazingly fast DataFrame library"
|
|
||||||
optional = false
|
|
||||||
python-versions = ">=3.8"
|
|
||||||
files = [
|
|
||||||
{file = "polars_lts_cpu-0.20.17-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c5ba1113df88bd0e46bc2e649279f1e2f09f20d24a7e3a8b07d342d1e117bf40"},
|
|
||||||
{file = "polars_lts_cpu-0.20.17-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:514e833c63d2734d9028ca754fe441479cb8d68d06efe9f88fdb348db9578941"},
|
|
||||||
{file = "polars_lts_cpu-0.20.17-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3512862da0bcb764ed5e63bb122d265295d503e5294c839d5f46f88937543cc1"},
|
|
||||||
{file = "polars_lts_cpu-0.20.17-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:2a30789e25a07e0c925e6fde030d2ee53024ae621a0194c423ff83f359d5f62c"},
|
|
||||||
{file = "polars_lts_cpu-0.20.17-cp38-abi3-win_amd64.whl", hash = "sha256:b5a3487d481517525d7c9b9c69210f123c2d1f233c47487fa058646c2dc3d42c"},
|
|
||||||
{file = "polars_lts_cpu-0.20.17.tar.gz", hash = "sha256:e11eb08f9264459339af4942c4be9c187daf2ffe4040d24284582e4e0e492ab7"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.extras]
|
|
||||||
adbc = ["adbc-driver-manager", "adbc-driver-sqlite"]
|
|
||||||
all = ["polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,pyiceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"]
|
|
||||||
async = ["nest-asyncio"]
|
|
||||||
cloudpickle = ["cloudpickle"]
|
|
||||||
connectorx = ["connectorx (>=0.3.2)"]
|
|
||||||
deltalake = ["deltalake (>=0.14.0)"]
|
|
||||||
fastexcel = ["fastexcel (>=0.9)"]
|
|
||||||
fsspec = ["fsspec"]
|
|
||||||
gevent = ["gevent"]
|
|
||||||
matplotlib = ["matplotlib"]
|
|
||||||
numpy = ["numpy (>=1.16.0)"]
|
|
||||||
openpyxl = ["openpyxl (>=3.0.0)"]
|
|
||||||
pandas = ["pandas", "pyarrow (>=7.0.0)"]
|
|
||||||
plot = ["hvplot (>=0.9.1)"]
|
|
||||||
pyarrow = ["pyarrow (>=7.0.0)"]
|
|
||||||
pydantic = ["pydantic"]
|
|
||||||
pyiceberg = ["pyiceberg (>=0.5.0)"]
|
|
||||||
pyxlsb = ["pyxlsb (>=1.0)"]
|
|
||||||
sqlalchemy = ["pandas", "sqlalchemy"]
|
|
||||||
timezone = ["backports-zoneinfo", "tzdata"]
|
|
||||||
xlsx2csv = ["xlsx2csv (>=0.8.0)"]
|
|
||||||
xlsxwriter = ["xlsxwriter"]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pydantic"
|
name = "pydantic"
|
||||||
version = "2.6.4"
|
version = "2.6.4"
|
||||||
|
@ -580,4 +541,4 @@ files = [
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.11"
|
python-versions = "^3.11"
|
||||||
content-hash = "882810214ec005c8e1d0b99099d0f9fc8d6e8fb9140ac9f452e18e7e3c580176"
|
content-hash = "3ca6841a3434879d43d536188bf827e8a74f959cbac3da3d272dc1cc47769620"
|
||||||
|
|
|
@ -10,7 +10,6 @@ python = "^3.11"
|
||||||
pypng = "^0.20220715.0"
|
pypng = "^0.20220715.0"
|
||||||
numpy = "^1.26.4"
|
numpy = "^1.26.4"
|
||||||
numpy-hilbert-curve = "^1.0.1"
|
numpy-hilbert-curve = "^1.0.1"
|
||||||
polars-lts-cpu = "^0.20.17"
|
|
||||||
cmap = "^0.1.3"
|
cmap = "^0.1.3"
|
||||||
fastapi = "^0.110.1"
|
fastapi = "^0.110.1"
|
||||||
uvicorn = "^0.29.0"
|
uvicorn = "^0.29.0"
|
||||||
|
|
Loading…
Reference in New Issue