Imrove efficiency of polars operations

This commit is contained in:
LilyRose2798 2024-04-13 14:07:49 +10:00
parent 4f53917c77
commit d0b3d05067

View file

@ -116,18 +116,17 @@ def generate_tiles(parquet_path: Path, tiles_dir: Path, *, tile_size = default_t
write_tile_p = functools.partial(write_tile, alpha = alpha)
def generate_images(colormap: str, type_name: str, series: pl.Series):
nonlocal df
def generate_images(colormap: str, type_name: str, expr: pl.Expr):
image_size = tiles_per_side * tile_size
if not quiet:
print(f"creating {type_name} image data with {colormap} colormap...", end = " ", flush = True)
image_data = np.zeros((tiles_per_side * tile_size, tiles_per_side * tile_size), dtype = f"S{channels}")
image_data[(df.get_column("y"), df.get_column("x"))] = (series * 255.9999).clip(0, 255).cast(pl.UInt8).replace(pl.int_range(256), colormaps_by_name[colormap], return_dtype = pl.Binary)
print(f"creating {image_size} by {image_size} pixel {type_name} image data with {colormap} colormap...", end = " ", flush = True)
image_data = np.zeros((image_size, image_size), dtype = f"S{channels}")
image_data[(df.get_column("y"), df.get_column("x"))] = df.select((expr * 255.9999).clip(0, 255).cast(pl.UInt8).replace(pl.int_range(256), colormaps_by_name[colormap], return_dtype = pl.Binary)).to_series()
if not quiet:
print("done")
if not quiet:
print(f"writing {tiles_per_side}x{tiles_per_side}={tiles_per_side * tiles_per_side} {type_name} images with {colormap} colormap...", end = " ", flush = True)
print(f"writing {tiles_per_side * tiles_per_side} ({tiles_per_side}x{tiles_per_side}) {type_name} images with {colormap} colormap...", end = " ", flush = True)
with Pool(processes) as pool:
z = tiles_per_side.bit_length() - 1
z_path = tiles_dir / type_name / colormap / f"{z}"
@ -142,7 +141,6 @@ def generate_tiles(parquet_path: Path, tiles_dir: Path, *, tile_size = default_t
])
if not quiet:
print("done")
del image_data
def scale_down_coords(scale = 2):
nonlocal df
@ -165,12 +163,9 @@ def generate_tiles(parquet_path: Path, tiles_dir: Path, *, tile_size = default_t
while True:
for colormap in colormaps:
if generate_density:
divisor = 256 if possible_overlaps == 1 else possible_overlaps
series = df.get_column("count") / divisor
generate_images(colormap, "density", series)
generate_images(colormap, "density", pl.col("count") / (256 if possible_overlaps == 1 else possible_overlaps))
if generate_rtt:
series = df.get_column("rtt_us") / rtt_quantile
generate_images(colormap, "rtt", series)
generate_images(colormap, "rtt", pl.col("rtt_us") / rtt_quantile)
if tiles_per_side == 1:
break
scale_down_coords()