Last active
January 15, 2025 19:01
-
-
Save jikanter/7ee1f29cec5332939f1a48e9f1bba479 to your computer and use it in GitHub Desktop.
[dbt snippets] #ai #aigenerated #chatgpt #
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # List of Highly Used Pandas Data Transforms | |
| # 1. Loading Data | |
| import pandas as pd | |
| data = pd.read_csv('data.csv') | |
| # 2. Basic Inspection | |
| data.info() | |
| data.describe() | |
| data.head() | |
| data.tail() | |
| data.columns | |
| # 3. Handling Missing Data | |
| # Drop missing values | |
| data.dropna(inplace=True) | |
| # Fill missing values | |
| data.fillna({'column_name': 0}, inplace=True) | |
| # Check for missing values | |
| data.isnull().sum() | |
| # 4. Filtering Rows | |
| # Filter rows by condition | |
| filtered_data = data[data['column_name'] > 10] | |
| # Filter rows with multiple conditions | |
| filtered_data = data[(data['column1'] > 10) & (data['column2'] == 'value')] | |
| # 5. Transforming Columns | |
| # Apply function to a column | |
| data['new_column'] = data['existing_column'].apply(lambda x: x * 2) | |
| # Vectorized operations | |
| data['new_column'] = data['existing_column'] * 2 | |
| # Renaming columns | |
| data.rename(columns={'old_name': 'new_name'}, inplace=True) | |
| # 6. Aggregations and Grouping | |
| # Group by and aggregate | |
| grouped_data = data.groupby('group_column').agg({'value_column': 'sum'}) | |
| # Reset index after grouping | |
| grouped_data.reset_index(inplace=True) | |
| # 7. Sorting Data | |
| # Sort by column values | |
| data.sort_values('column_name', ascending=True, inplace=True) | |
| # Sort by multiple columns | |
| data.sort_values(['column1', 'column2'], ascending=[True, False], inplace=True) | |
| # 8. Merging and Joining | |
| # Merge two dataframes on a key | |
| merged_data = pd.merge(data1, data2, on='key_column', how='inner') | |
| # Concatenate dataframes vertically or horizontally | |
| concatenated_data = pd.concat([data1, data2], axis=0) # axis=1 for horizontal | |
| # 9. Pivoting and Reshaping | |
| # Pivot table | |
| pivot_table = data.pivot_table(values='value_column', index='index_column', columns='columns_column', aggfunc='mean') | |
| # Melt data | |
| melted_data = pd.melt(data, id_vars=['id_column'], value_vars=['value_column']) | |
| # 10. Working with Dates | |
| # Convert column to datetime | |
| data['date_column'] = pd.to_datetime(data['date_column']) | |
| # Extract components of datetime | |
| data['year'] = data['date_column'].dt.year | |
| data['month'] = data['date_column'].dt.month | |
| # 11. Exporting Data | |
| # Export to CSV | |
| data.to_csv('output.csv', index=False) | |
| # Export to Excel | |
| data.to_excel('output.xlsx', index=False) | |
| # 12. Advanced Indexing | |
| # Set index | |
| data.set_index('index_column', inplace=True) | |
| # Reset index | |
| data.reset_index(inplace=True) | |
| # Selecting by index | |
| data.loc['row_label'] | |
| data.iloc[0] | |
| # 13. Handling Duplicates | |
| # Drop duplicates | |
| data.drop_duplicates(subset=['column_name'], inplace=True) | |
| # Check for duplicates | |
| data.duplicated(subset=['column_name']).sum() | |
| # 14. Data Type Conversions | |
| # Convert column type | |
| data['column_name'] = data['column_name'].astype('int') | |
| # Convert multiple columns | |
| data = data.astype({'col1': 'float', 'col2': 'int'}) | |
| # 15. Basic Text Manipulations | |
| # Append text to a column | |
| data['new_column'] = data['existing_column'] + '_suffix' | |
| # Prepend text to a column | |
| data['new_column'] = 'prefix_' + data['existing_column'] | |
| # Replace text in a column | |
| data['new_column'] = data['existing_column'].str.replace('old_value', 'new_value', regex=True) | |
| # Convert to lowercase | |
| data['existing_column'] = data['existing_column'].str.lower() | |
| # Convert to uppercase | |
| data['existing_column'] = data['existing_column'].str.upper() | |
| # Strip whitespace from a column | |
| data['existing_column'] = data['existing_column'].str.strip() | |
| # Extract substring from a column | |
| data['substring'] = data['existing_column'].str[0:5] | |
| # Check if a string contains a substring | |
| data['contains_value'] = data['existing_column'].str.contains('value', na=False) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment