Simple Program to explain the speed, Memory Usage using polar > pandas.
import pandas as pd
import polars as pl
import numpy as np
import time, psutil
# Create sample data
n = 100_000_000
data = {"A": np.random.rand(n), "B": np.random.rand(n)}
# --- Pandas ---
start = time.time()
pdf = pd.DataFrame(data)
print("Pandas creation time:", round(time.time() - start, 3), "s")
#Pandas creation time: 0.475 s
# --- Polars ---
start = time.time()
pldf = pl.DataFrame(data)
print("Polars creation time:", round(time.time() - start, 3), "s")
#Polars creation time: 0.001 s
# To check the memory image comparison
import sys
print("Pandas Memory (MB):", round(pdf.memory_usage(deep=True).sum() / 1024**2, 2))
#Pandas Memory (MB): 1525.88
print("Polars Memory (MB):", round(pldf.estimated_size() / 1024**2, 2))
#Polars Memory (MB): _______ (find out yourself)👈👈👈👈
# Benchmark Evaluation
import matplotlib.pyplot as plt
sizes = [10_000, 100_000, 1_000_000, 5_000_000]
pandas_times, polars_times = [], []
for n in sizes:
data = {"A": np.random.rand(n), "B": np.random.rand(n)}
start = time.time()
pd.DataFrame(data)["A"].sum()
pandas_times.append(time.time() - start)
start = time.time()
pl.DataFrame(data)["A"].sum()
polars_times.append(time.time() - start)
plt.plot(sizes, pandas_times, label="Pandas")
plt.plot(sizes, polars_times, label="Polars")
plt.xlabel("Rows")
plt.ylabel("Time (s)")
plt.legend()
plt.title("Polars vs Pandas Performance")
plt.show()
Response:
n-joy 😄😄😄
No comments:
Post a Comment