Pandas

Getting Started {.cols-2}

You’ll need to import pandas to get started:

import pandas as pd

-	-
`df['col1']`	Select column
`df[['col1', 'col2']]`	Select multiple columns
`df.loc[0]`	Select row by index
`df.loc[:, 'col1']`	Select all rows for ‘col1’
`df.iloc[0]`	Select row by position
`df.iloc[0, 1]`	Select specific value
`df[df['col1'] > 2]`	Select rows based on condition

-	-
`df.dropna()`	Drop rows with any missing values
`df.dropna(axis=1)`	Drop columns with any missing values
`df.fillna(0)`	Replace missing values with 0
`df.drop_duplicates()`	Drop duplicate rows
`df.rename(columns={'old_name': 'new_name'})`	Rename columns
`df.astype('int')`	Change data type

-	-
`df['col3'] = df['col1'] + df['col2']`	Add new column
`df.drop('col1', axis=1)`	Drop column
`df.append(new_row)`	Add new row
`df.insert(2, 'new_col', new_data)`	Insert new column at position 2

-	-
`pd.concat([df1, df2])`	Concatenate rows
`pd.concat([df1, df2], axis=1)`	Concatenate columns
`pd.merge(df1, df2, on='key')`	Merge DataFrames on key
`pd.merge(df1, df2, left_on='key1', right_on='key2')`	Merge on different keys
`df1.join(df2, lsuffix='_left', rsuffix='_right')`	Join DataFrames

-	-
`df.apply(np.sqrt)`	Apply function to all values
`df['col1'].apply(lambda x: x ** 2)`	Apply function to column
`df.applymap(str)`	Apply function to DataFrame elements
`df['col1'].map({'a': 1, 'b': 2})`	Map values
`df['col1'].replace('a', 1)`	Replace values

-	-
`df['date'] = pd.to_datetime(df['date'])`	Convert to datetime
`df['year'] = df['date'].dt.year`	Extract year
`df['month'] = df['date'].dt.month`	Extract month
`df['day'] = df['date'].dt.day`	Extract day
`df.set_index('date', inplace=True)`	Set date as index

-	-
`df.to_csv('file.csv')`	Save DataFrame to CSV
`df = pd.read_csv('file.csv')`	Load DataFrame from CSV
`df.to_excel('file.xlsx')`	Save DataFrame to Excel
`df = pd.read_excel('file.xlsx')`	Load DataFrame from Excel
`from sqlalchemy import create_engine`	Import SQLAlchemy for SQL operations
`engine = create_engine('sqlite:///:memory:')`	Create SQL engine
`df.to_sql('table_name', engine)`	Save to SQL table
`df = pd.read_sql('table_name', engine)`	Load from SQL table