Refactor convert command to remove polars entirely

This commit is contained in:
LilyRose2798 2024-04-15 22:23:00 +10:00
parent 3f61d7fac6
commit 27a8d2584d
3 changed files with 12 additions and 53 deletions

View File

@ -16,7 +16,6 @@ from png import Writer
from cmap import Colormap
from hilbert import decode
import numpy as np
import polars as pl
ip_bytes = 4
ip_bits = ip_bytes * 8
@ -56,17 +55,17 @@ def make_coords(output_path: Path, batches = default_batches, processes = defaul
print(f"finished writing to file")
def convert(input_path: Path, output_path: Path):
print(f"scanning csv '{input_path}' into array...", end = " ", flush = True)
lf = pl.scan_csv(input_path, schema = {
"saddr_raw": pl.UInt32,
"rtt_us": pl.UInt64,
"success": pl.UInt8
})
lf = lf.filter(pl.col("success") == 1)
lf = lf.drop("success")
lf = lf.with_columns(rtt_us = pl.col("rtt_us").clip(0, 0xFFFFFFFF).cast(pl.UInt32))
lf = lf.unique("saddr_raw")
arr = lf.collect().to_numpy()
print(f"reading csv '{input_path}' into array...", end = " ", flush = True)
arr = np.loadtxt(input_path, dtype = np.uint32, delimiter = ",", skiprows = 1)
print("done")
print("filtering out unsuccessful values...", end = " ", flush = True)
arr = arr[arr[:, -1] == 1]
print("done")
print("removing success column...", end = " ", flush = True)
arr = arr[:, :-1]
print("done")
print("removing duplicate IP addresses...", end = " ", flush = True)
arr = arr[np.unique(arr[:, 0], return_index = True)[1]]
print("done")
print("converting IP addresses from big-endian to little-endian...", end = " ", flush = True)
arr[:, 0].byteswap(inplace = True)

41
poetry.lock generated
View File

@ -264,45 +264,6 @@ files = [
{file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
]
[[package]]
name = "polars-lts-cpu"
version = "0.20.17"
description = "Blazingly fast DataFrame library"
optional = false
python-versions = ">=3.8"
files = [
{file = "polars_lts_cpu-0.20.17-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c5ba1113df88bd0e46bc2e649279f1e2f09f20d24a7e3a8b07d342d1e117bf40"},
{file = "polars_lts_cpu-0.20.17-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:514e833c63d2734d9028ca754fe441479cb8d68d06efe9f88fdb348db9578941"},
{file = "polars_lts_cpu-0.20.17-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3512862da0bcb764ed5e63bb122d265295d503e5294c839d5f46f88937543cc1"},
{file = "polars_lts_cpu-0.20.17-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:2a30789e25a07e0c925e6fde030d2ee53024ae621a0194c423ff83f359d5f62c"},
{file = "polars_lts_cpu-0.20.17-cp38-abi3-win_amd64.whl", hash = "sha256:b5a3487d481517525d7c9b9c69210f123c2d1f233c47487fa058646c2dc3d42c"},
{file = "polars_lts_cpu-0.20.17.tar.gz", hash = "sha256:e11eb08f9264459339af4942c4be9c187daf2ffe4040d24284582e4e0e492ab7"},
]
[package.extras]
adbc = ["adbc-driver-manager", "adbc-driver-sqlite"]
all = ["polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,pyiceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"]
async = ["nest-asyncio"]
cloudpickle = ["cloudpickle"]
connectorx = ["connectorx (>=0.3.2)"]
deltalake = ["deltalake (>=0.14.0)"]
fastexcel = ["fastexcel (>=0.9)"]
fsspec = ["fsspec"]
gevent = ["gevent"]
matplotlib = ["matplotlib"]
numpy = ["numpy (>=1.16.0)"]
openpyxl = ["openpyxl (>=3.0.0)"]
pandas = ["pandas", "pyarrow (>=7.0.0)"]
plot = ["hvplot (>=0.9.1)"]
pyarrow = ["pyarrow (>=7.0.0)"]
pydantic = ["pydantic"]
pyiceberg = ["pyiceberg (>=0.5.0)"]
pyxlsb = ["pyxlsb (>=1.0)"]
sqlalchemy = ["pandas", "sqlalchemy"]
timezone = ["backports-zoneinfo", "tzdata"]
xlsx2csv = ["xlsx2csv (>=0.8.0)"]
xlsxwriter = ["xlsxwriter"]
[[package]]
name = "pydantic"
version = "2.6.4"
@ -580,4 +541,4 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "882810214ec005c8e1d0b99099d0f9fc8d6e8fb9140ac9f452e18e7e3c580176"
content-hash = "3ca6841a3434879d43d536188bf827e8a74f959cbac3da3d272dc1cc47769620"

View File

@ -10,7 +10,6 @@ python = "^3.11"
pypng = "^0.20220715.0"
numpy = "^1.26.4"
numpy-hilbert-curve = "^1.0.1"
polars-lts-cpu = "^0.20.17"
cmap = "^0.1.3"
fastapi = "^0.110.1"
uvicorn = "^0.29.0"