← Home

PandaPad

Scripts run in-browser via Pyodide — your data never leaves your device. Only column names, types, and row count are sent to the LLM.


Drop Missing

Remove rows with any NaN values

df = df.dropna()

Fill Missing with Zero

Replace NaN values with 0

df = df.fillna(0)

Fill Missing Forward

Forward-fill NaN values from previous rows

df = df.ffill()

Deduplicate

Remove duplicate rows, keep first occurrence

df = df.drop_duplicates(keep='first')

Remove Outliers (IQR)

Drop rows outside 1.5x IQR on numeric columns

num_cols = df.select_dtypes(include='number').columns
for col in num_cols:
    q1, q3 = df[col].quantile(0.25), df[col].quantile(0.75)
    iqr = q3 - q1
    df = df[(df[col] >= q1 - 1.5 * iqr) & (df[col] <= q3 + 1.5 * iqr)]

Clean Column Names

Lowercase column names, replace spaces with underscores

df.columns = df.columns.str.strip().str.lower().str.replace(r'\s+', '_', regex=True)

String Cleanup

Strip whitespace and lowercase all string columns

str_cols = df.select_dtypes(include='object').columns
for col in str_cols:
    df[col] = df[col].str.strip().str.lower()

Sort by Column

Sort by the first numeric column descending

num_cols = df.select_dtypes(include='number').columns
if len(num_cols) > 0:
    df = df.sort_values(num_cols[0], ascending=False)

Normalize Numbers

Min-max scale all numeric columns to 0-1

num_cols = df.select_dtypes(include='number').columns
for col in num_cols:
    mn, mx = df[col].min(), df[col].max()
    if mx > mn:
        df[col] = (df[col] - mn) / (mx - mn)

Transpose

Flip rows and columns

df = df.set_index(df.columns[0]).T
df.index.name = 'row'
df = df.reset_index()

Rename Columns

Prefix all column names with col_

df.columns = ['col_' + c for c in df.columns]

Add Row Number

Insert an index column starting at 1

df.insert(0, 'row_number', range(1, len(df) + 1))

Bin Numeric Column

Cut first numeric column into Low/Medium/High

num_cols = df.select_dtypes(include='number').columns
if len(num_cols) > 0:
    col = num_cols[0]
    df[col + '_bin'] = pd.cut(df[col], bins=3, labels=['Low', 'Medium', 'High'])

One-Hot Encode

Convert categorical columns to dummy variables

df = pd.get_dummies(df, columns=df.select_dtypes(include='object').columns.tolist())

Log Transform

Apply log(1+x) to all numeric columns

num_cols = df.select_dtypes(include='number').columns
df[num_cols] = np.log1p(df[num_cols])

Cumulative Sum

Running total on all numeric columns

num_cols = df.select_dtypes(include='number').columns
df[num_cols] = df[num_cols].cumsum()

Percent Change

Row-over-row percent change for numeric columns

num_cols = df.select_dtypes(include='number').columns
df[num_cols] = df[num_cols].pct_change()

Rank Values

Rank numeric columns (1 = smallest)

num_cols = df.select_dtypes(include='number').columns
df[num_cols] = df[num_cols].rank()

Summary Stats

Detailed statistics for numeric columns

df = df.describe().T
df.index.name = 'column'
df = df.reset_index()

Group & Count

Group by first column and count rows

col = df.columns[0]
df = df.groupby(col).size().reset_index(name='count')
df = df.sort_values('count', ascending=False)

Value Counts

Frequency table of first column

col = df.columns[0]
df = df[col].value_counts().reset_index()
df.columns = [col, 'count']

Correlation Matrix

Pairwise correlation of numeric columns

df = df.select_dtypes(include='number').corr().reset_index()
df = df.rename(columns={'index': 'column'})

Column Means

Single-row DataFrame of means per numeric column

df = df.select_dtypes(include='number').mean().to_frame('mean').T
df = df.reset_index(drop=True)

Top 10 Rows

Keep only the first 10 rows

df = df.head(10)

Bottom 10 Rows

Keep only the last 10 rows

df = df.tail(10)

Random Sample

Randomly sample up to 10 rows

df = df.sample(n=min(10, len(df)))

Filter Above Mean

Keep rows where first numeric column exceeds its mean

num_cols = df.select_dtypes(include='number').columns
if len(num_cols) > 0:
    col = num_cols[0]
    df = df[df[col] > df[col].mean()]

Select Numeric Only

Drop all non-numeric columns

df = df.select_dtypes(include='number')

Drop Constant Columns

Remove columns with only a single unique value

df = df.loc[:, df.nunique() > 1]

Fix All Data Types

Smart detection: strips $, commas, %, parses dates, casts numeric

for col in df.columns:
    s = df[col].astype(str).str.strip()
    # Currency: $1,234.56 → 1234.56
    if s.str.match(r'^\$[\d,.]+$').any():
        df[col] = s.str.replace(r'[$,]', '', regex=True)
        df[col] = pd.to_numeric(df[col], errors='coerce')
        continue
    # Percent: 45% → 0.45
    if s.str.match(r'^[\d.]+%$').any():
        df[col] = s.str.rstrip('%')
        df[col] = pd.to_numeric(df[col], errors='coerce') / 100
        continue
    # Dates
    try:
        parsed = pd.to_datetime(s, infer_datetime_format=True)
        if parsed.notna().sum() > len(s) * 0.5:
            df[col] = parsed
            continue
    except Exception:
        pass
    # Generic numeric
    df[col] = pd.to_numeric(df[col], errors='ignore')

Title Case

Capitalize words in all string columns

str_cols = df.select_dtypes(include='object').columns
for col in str_cols:
    df[col] = df[col].str.title()

Extract Numbers

Pull numeric values from string columns

str_cols = df.select_dtypes(include='object').columns
for col in str_cols:
    df[col + '_number'] = df[col].str.extract(r'(\d+\.?\d*)', expand=False)
    df[col + '_number'] = pd.to_numeric(df[col + '_number'], errors='coerce')

Word Count

Add word count column for each string column

str_cols = df.select_dtypes(include='object').columns
for col in str_cols:
    df[col + '_words'] = df[col].str.split().str.len()

Loading pandas environment…

(~30 seconds on first load, much faster after that)