Monday, October 6, 2025

#1 Polar vs Pandas

Simple Program to explain the speed, Memory Usage  using polar > pandas.

import pandas as pd
import polars as pl


import numpy as np
import time, psutil

# Create sample data
n = 100_000_000
data = {"A": np.random.rand(n), "B": np.random.rand(n)}

# --- Pandas ---
start = time.time()
pdf = pd.DataFrame(data)
print("Pandas creation time:", round(time.time() - start, 3), "s")
#Pandas creation time: 0.475 s


# --- Polars ---
start = time.time()
pldf = pl.DataFrame(data)
print("Polars creation time:", round(time.time() - start, 3), "s")
#Polars creation time: 0.001 s


# To check the memory image comparison
import sys

print("Pandas Memory (MB):", round(pdf.memory_usage(deep=True).sum() / 1024**2, 2))
#Pandas Memory (MB): 1525.88
print("Polars Memory (MB):", round(pldf.estimated_size() / 1024**2, 2))
#Polars Memory (MB): _______ (find out yourself)👈👈👈👈


# Benchmark Evaluation


import matplotlib.pyplot as plt

sizes = [10_000, 100_000, 1_000_000, 5_000_000]
pandas_times, polars_times = [], []

for n in sizes:
    data = {"A": np.random.rand(n), "B": np.random.rand(n)}

    start = time.time()
    pd.DataFrame(data)["A"].sum()
    pandas_times.append(time.time() - start)

    start = time.time()
    pl.DataFrame(data)["A"].sum()
    polars_times.append(time.time() - start)

plt.plot(sizes, pandas_times, label="Pandas")
plt.plot(sizes, polars_times, label="Polars")
plt.xlabel("Rows")
plt.ylabel("Time (s)")
plt.legend()
plt.title("Polars vs Pandas Performance")
plt.show()


Response:

n-joy 😄😄😄

No comments:

Post a Comment

#1 Polar vs Pandas

Simple Program to explain the speed, Memory Usage  using polar > pandas. import pandas as pd import polars as pl import numpy as ...