PandaPad is designed so your datasets never leave your device. All data processing and code execution run locally in your browser via Pyodide.
For AI-assisted code generation, only column metadata and your prompt are sent to the AI provider — cell values are never transmitted.
Scripts run in-browser via Pyodide — your data never leaves your device. Only column names, types, and row count are sent to the LLM.
Remove rows with any NaN values
df = df.dropna() Replace NaN values with 0
df = df.fillna(0) Forward-fill NaN values from previous rows
df = df.ffill() Remove duplicate rows, keep first occurrence
df = df.drop_duplicates(keep='first') Drop rows outside 1.5x IQR on numeric columns
num_cols = df.select_dtypes(include='number').columns
for col in num_cols:
q1, q3 = df[col].quantile(0.25), df[col].quantile(0.75)
iqr = q3 - q1
df = df[(df[col] >= q1 - 1.5 * iqr) & (df[col] <= q3 + 1.5 * iqr)] Lowercase column names, replace spaces with underscores
df.columns = df.columns.str.strip().str.lower().str.replace(r'\s+', '_', regex=True) Strip whitespace and lowercase all string columns
str_cols = df.select_dtypes(include='object').columns
for col in str_cols:
df[col] = df[col].str.strip().str.lower() Sort by the first numeric column descending
num_cols = df.select_dtypes(include='number').columns
if len(num_cols) > 0:
df = df.sort_values(num_cols[0], ascending=False) Min-max scale all numeric columns to 0-1
num_cols = df.select_dtypes(include='number').columns
for col in num_cols:
mn, mx = df[col].min(), df[col].max()
if mx > mn:
df[col] = (df[col] - mn) / (mx - mn) Flip rows and columns
df = df.set_index(df.columns[0]).T
df.index.name = 'row'
df = df.reset_index() Prefix all column names with col_
df.columns = ['col_' + c for c in df.columns] Insert an index column starting at 1
df.insert(0, 'row_number', range(1, len(df) + 1)) Cut first numeric column into Low/Medium/High
num_cols = df.select_dtypes(include='number').columns
if len(num_cols) > 0:
col = num_cols[0]
df[col + '_bin'] = pd.cut(df[col], bins=3, labels=['Low', 'Medium', 'High']) Convert categorical columns to dummy variables
df = pd.get_dummies(df, columns=df.select_dtypes(include='object').columns.tolist()) Apply log(1+x) to all numeric columns
num_cols = df.select_dtypes(include='number').columns
df[num_cols] = np.log1p(df[num_cols]) Running total on all numeric columns
num_cols = df.select_dtypes(include='number').columns
df[num_cols] = df[num_cols].cumsum() Row-over-row percent change for numeric columns
num_cols = df.select_dtypes(include='number').columns
df[num_cols] = df[num_cols].pct_change() Rank numeric columns (1 = smallest)
num_cols = df.select_dtypes(include='number').columns
df[num_cols] = df[num_cols].rank() Detailed statistics for numeric columns
df = df.describe().T
df.index.name = 'column'
df = df.reset_index() Group by first column and count rows
col = df.columns[0]
df = df.groupby(col).size().reset_index(name='count')
df = df.sort_values('count', ascending=False) Frequency table of first column
col = df.columns[0]
df = df[col].value_counts().reset_index()
df.columns = [col, 'count'] Pairwise correlation of numeric columns
df = df.select_dtypes(include='number').corr().reset_index()
df = df.rename(columns={'index': 'column'}) Single-row DataFrame of means per numeric column
df = df.select_dtypes(include='number').mean().to_frame('mean').T
df = df.reset_index(drop=True) Keep only the first 10 rows
df = df.head(10) Keep only the last 10 rows
df = df.tail(10) Randomly sample up to 10 rows
df = df.sample(n=min(10, len(df))) Keep rows where first numeric column exceeds its mean
num_cols = df.select_dtypes(include='number').columns
if len(num_cols) > 0:
col = num_cols[0]
df = df[df[col] > df[col].mean()] Drop all non-numeric columns
df = df.select_dtypes(include='number') Remove columns with only a single unique value
df = df.loc[:, df.nunique() > 1] Smart detection: strips $, commas, %, parses dates, casts numeric
for col in df.columns:
s = df[col].astype(str).str.strip()
# Currency: $1,234.56 → 1234.56
if s.str.match(r'^\$[\d,.]+$').any():
df[col] = s.str.replace(r'[$,]', '', regex=True)
df[col] = pd.to_numeric(df[col], errors='coerce')
continue
# Percent: 45% → 0.45
if s.str.match(r'^[\d.]+%$').any():
df[col] = s.str.rstrip('%')
df[col] = pd.to_numeric(df[col], errors='coerce') / 100
continue
# Dates
try:
parsed = pd.to_datetime(s, infer_datetime_format=True)
if parsed.notna().sum() > len(s) * 0.5:
df[col] = parsed
continue
except Exception:
pass
# Generic numeric
df[col] = pd.to_numeric(df[col], errors='ignore') Capitalize words in all string columns
str_cols = df.select_dtypes(include='object').columns
for col in str_cols:
df[col] = df[col].str.title() Pull numeric values from string columns
str_cols = df.select_dtypes(include='object').columns
for col in str_cols:
df[col + '_number'] = df[col].str.extract(r'(\d+\.?\d*)', expand=False)
df[col + '_number'] = pd.to_numeric(df[col + '_number'], errors='coerce') Add word count column for each string column
str_cols = df.select_dtypes(include='object').columns
for col in str_cols:
df[col + '_words'] = df[col].str.split().str.len() Loading pandas environment…
(~30 seconds on first load, much faster after that)