Refactor convert command to remove polars entirely

This commit is contained in:
LilyRose2798 2024-04-15 22:23:00 +10:00
parent 3f61d7fac6
commit 27a8d2584d
3 changed files with 12 additions and 53 deletions

View File

@ -16,7 +16,6 @@ from png import Writer
from cmap import Colormap from cmap import Colormap
from hilbert import decode from hilbert import decode
import numpy as np import numpy as np
import polars as pl
ip_bytes = 4 ip_bytes = 4
ip_bits = ip_bytes * 8 ip_bits = ip_bytes * 8
@ -56,17 +55,17 @@ def make_coords(output_path: Path, batches = default_batches, processes = defaul
print(f"finished writing to file") print(f"finished writing to file")
def convert(input_path: Path, output_path: Path): def convert(input_path: Path, output_path: Path):
print(f"scanning csv '{input_path}' into array...", end = " ", flush = True) print(f"reading csv '{input_path}' into array...", end = " ", flush = True)
lf = pl.scan_csv(input_path, schema = { arr = np.loadtxt(input_path, dtype = np.uint32, delimiter = ",", skiprows = 1)
"saddr_raw": pl.UInt32, print("done")
"rtt_us": pl.UInt64, print("filtering out unsuccessful values...", end = " ", flush = True)
"success": pl.UInt8 arr = arr[arr[:, -1] == 1]
}) print("done")
lf = lf.filter(pl.col("success") == 1) print("removing success column...", end = " ", flush = True)
lf = lf.drop("success") arr = arr[:, :-1]
lf = lf.with_columns(rtt_us = pl.col("rtt_us").clip(0, 0xFFFFFFFF).cast(pl.UInt32)) print("done")
lf = lf.unique("saddr_raw") print("removing duplicate IP addresses...", end = " ", flush = True)
arr = lf.collect().to_numpy() arr = arr[np.unique(arr[:, 0], return_index = True)[1]]
print("done") print("done")
print("converting IP addresses from big-endian to little-endian...", end = " ", flush = True) print("converting IP addresses from big-endian to little-endian...", end = " ", flush = True)
arr[:, 0].byteswap(inplace = True) arr[:, 0].byteswap(inplace = True)

41
poetry.lock generated
View File

@ -264,45 +264,6 @@ files = [
{file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
] ]
[[package]]
name = "polars-lts-cpu"
version = "0.20.17"
description = "Blazingly fast DataFrame library"
optional = false
python-versions = ">=3.8"
files = [
{file = "polars_lts_cpu-0.20.17-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c5ba1113df88bd0e46bc2e649279f1e2f09f20d24a7e3a8b07d342d1e117bf40"},
{file = "polars_lts_cpu-0.20.17-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:514e833c63d2734d9028ca754fe441479cb8d68d06efe9f88fdb348db9578941"},
{file = "polars_lts_cpu-0.20.17-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3512862da0bcb764ed5e63bb122d265295d503e5294c839d5f46f88937543cc1"},
{file = "polars_lts_cpu-0.20.17-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:2a30789e25a07e0c925e6fde030d2ee53024ae621a0194c423ff83f359d5f62c"},
{file = "polars_lts_cpu-0.20.17-cp38-abi3-win_amd64.whl", hash = "sha256:b5a3487d481517525d7c9b9c69210f123c2d1f233c47487fa058646c2dc3d42c"},
{file = "polars_lts_cpu-0.20.17.tar.gz", hash = "sha256:e11eb08f9264459339af4942c4be9c187daf2ffe4040d24284582e4e0e492ab7"},
]
[package.extras]
adbc = ["adbc-driver-manager", "adbc-driver-sqlite"]
all = ["polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,pyiceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"]
async = ["nest-asyncio"]
cloudpickle = ["cloudpickle"]
connectorx = ["connectorx (>=0.3.2)"]
deltalake = ["deltalake (>=0.14.0)"]
fastexcel = ["fastexcel (>=0.9)"]
fsspec = ["fsspec"]
gevent = ["gevent"]
matplotlib = ["matplotlib"]
numpy = ["numpy (>=1.16.0)"]
openpyxl = ["openpyxl (>=3.0.0)"]
pandas = ["pandas", "pyarrow (>=7.0.0)"]
plot = ["hvplot (>=0.9.1)"]
pyarrow = ["pyarrow (>=7.0.0)"]
pydantic = ["pydantic"]
pyiceberg = ["pyiceberg (>=0.5.0)"]
pyxlsb = ["pyxlsb (>=1.0)"]
sqlalchemy = ["pandas", "sqlalchemy"]
timezone = ["backports-zoneinfo", "tzdata"]
xlsx2csv = ["xlsx2csv (>=0.8.0)"]
xlsxwriter = ["xlsxwriter"]
[[package]] [[package]]
name = "pydantic" name = "pydantic"
version = "2.6.4" version = "2.6.4"
@ -580,4 +541,4 @@ files = [
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "882810214ec005c8e1d0b99099d0f9fc8d6e8fb9140ac9f452e18e7e3c580176" content-hash = "3ca6841a3434879d43d536188bf827e8a74f959cbac3da3d272dc1cc47769620"

View File

@ -10,7 +10,6 @@ python = "^3.11"
pypng = "^0.20220715.0" pypng = "^0.20220715.0"
numpy = "^1.26.4" numpy = "^1.26.4"
numpy-hilbert-curve = "^1.0.1" numpy-hilbert-curve = "^1.0.1"
polars-lts-cpu = "^0.20.17"
cmap = "^0.1.3" cmap = "^0.1.3"
fastapi = "^0.110.1" fastapi = "^0.110.1"
uvicorn = "^0.29.0" uvicorn = "^0.29.0"