Hello Python¶

Basic Syntax¶

# Variables
x = 10
name = "Alice"
pi = 3.14
is_valid = True

# Print
print(f"Name: {name}, Pi: {pi}")

# Conditions
if x > 5:
    print("Greater")
elif x == 5:
    print("Equal")
else:
    print("Smaller")

# Loops
for i in range(5):
    print(i)

while x > 0:
    x -= 1

Functions and Classes¶

def greet(name):
    return f"Hello, {name}!"

class Person:
    def __init__(self, name):
        self.name = name

    def say_hi(self):
        print(f"Hi, I'm {self.name}")

Lists, Dicts, Sets¶

# Lists
nums = [1, 2, 3]
nums.append(4)

# Dicts
ages = {"Alice": 25, "Bob": 30}
ages["Eve"] = 22

# Sets
unique_vals = set([1, 2, 2, 3])

List Comprehensions¶

squares = [x**2 for x in range(10)]
evens = [x for x in range(20) if x % 2 == 0]

Numpy¶

import numpy as np

a = np.array([1, 2, 3])
b = np.zeros((2, 3))
c = np.random.randn(3, 3)

print(a.shape, a.dtype)

# Element-wise ops
d = a * 2 + 1
print(d)

Pandas¶

import pandas as pd

# Create DataFrame
df = pd.DataFrame({
    "name": ["Alice", "Bob"],
    "age": [25, 30]
})

# Read / write
df = pd.read_csv("data.csv")
df.to_csv("out.csv", index=False)

# Inspect
df.head()
df.info()
df.describe()

# Filter
df[df["age"] > 26]

# Group & Aggregate
df.groupby("name")["age"].mean()

# Apply
df["age_plus_1"] = df["age"].apply(lambda x: x + 1)

Matplotlib¶

import matplotlib.pyplot as plt
import seaborn as sns

# Line plot
plt.plot([1, 2, 3], [3, 2, 1])
plt.title("Line Plot")
plt.show()

# Histogram
sns.histplot(df["age"], bins=10)

# Boxplot
sns.boxplot(x="name", y="age", data=df)

Scikit-learn¶

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Split data
X = df[["age"]]
y = df["income"]
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict & evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

Data Cleaning Tips¶

# Missing values
df.isnull().sum()
df.dropna(inplace=True)
df.fillna(0, inplace=True)

# Duplicates
df.drop_duplicates(inplace=True)

# Rename columns
df.rename(columns={"old": "new"}, inplace=True)

# Change types
df["age"] = df["age"].astype(int)

Useful One-Liners¶

# Flatten list
flat = [item for sublist in nested_list for item in sublist]

# Most common element
from collections import Counter
Counter(my_list).most_common(1)

# Merge DataFrames
df_merged = pd.merge(df1, df2, on="id")

Environment Tools¶

# Create virtual env
python -m venv venv
source venv/bin/activate

# Install packages
pip install numpy pandas matplotlib seaborn scikit-learn jupyter

# Run Jupyter
jupyter notebook

Quick Plot in One Line¶

pd.Series(np.random.randn(1000)).hist()
plt.show()