Connecting SQL using Pandas
Connecting SQL using Pandas and MySQL
import pandas as pd
import mysql.connector
# Connect to the MySQL database
conn = mysql.connector.connect(
host='localhost',
user='your_username',
password='your_password',
database='your_database'
)
# Create a cursor object
cursor = conn.cursor()
# Query to fetch customer details from the customers table
customer_query = 'SELECT * FROM customers'
# Fetch customer details into a DataFrame
customers_df = pd.read_sql_query(customer_query, conn)
# Query to fetch transaction details from the transactions table
transaction_query = 'SELECT * FROM transactions'
# Fetch transaction details into a DataFrame
transactions_df = pd.read_sql_query(transaction_query, conn)
# Query to fetch loan details from the loans table
loan_query = 'SELECT * FROM loans'
# Fetch loan details into a DataFrame
loans_df = pd.read_sql_query(loan_query, conn)
# Print the DataFrames
print("Customers DataFrame:")
print(customers_df)
print("Transactions DataFrame:")
print(transactions_df)
print("Loans DataFrame:")
print(loans_df)
# Close the connection
conn.close()
_____________________________________________________________________________________
Creating Dummy -
import sqlite3
# Connect to an in-memory SQLite database
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
# Create the customers table
cursor.execute('''
CREATE TABLE customers (
customer_id INTEGER PRIMARY KEY,
age INTEGER,
income REAL,
credit_score INTEGER,
loan_amount REAL
)
''')
# Create the transactions table
cursor.execute('''
CREATE TABLE transactions (
transaction_id INTEGER PRIMARY KEY,
customer_id INTEGER,
amount REAL,
merchant TEXT,
timestamp TEXT
)
''')
# Insert dummy data into the customers table
customers_data = [
(1, 30, 50000, 700, None),
(2, 35, 60000, 750, None),
(3, 40, 70000, 800, None),
(4, 35, 55000, 720, None),
(5, 32, 48000, 690, None),
(6, 45, 75000, 810, None),
(7, 37, 62000, 760, None),
(8, 31, 51000, 710, None),
(9, 33, 53000, 730, None),
(10, 38, 65000, 770, None),
(11, 36, 58000, 740, None),
(12, 39, 68000, 780, None),
(13, 34, 52000, 720, None),
(14, 29, 49000, 690, None),
(15, 42, 72000, 800, None),
(16, 28, 47000, 680, None),
(17, 41, 69000, 790, None),
(18, 32, 50000, 710, None),
(19, 34, 51000, 730, None),
(20, 37, 62000, 770, None),
(21, 33, 54000, 720, None),
(22, 35, 55000, 740, None),
(23, 30, 48000, 690, None),
(24, 38, 66000, 780, None),
(25, 31, 52000, 710, None),
(26, 43, 70000, 800, None),
(27, 39, 64000, 760, None),
(28, 36, 58000, 740, None),
(29, 29, 47000, 680, None),
(30, 40, 73000, 790, None),
(31, 32, 50000, 710, None),
(32, 34, 51000, 730, None),
(33, 36, 59000, 750, None),
(34, 35, 55000, 740, None),
(35, 31, 49000, 700, None),
(36, 42, 69000, 800, None),
(37, 28, 47000, 680, None),
(38, 37, 63000, 760, None),
(39, 33, 52000, 710, None),
(40, 30, 48000, 690, None),
]
cursor.executemany('INSERT INTO customers VALUES (?, ?, ?, ?, ?)', customers_data)
# Insert dummy data into the transactions table
transactions_data = [
(1, 1, 100, 'Merchant A', '2023-05-01 10:00:00'),
(2, 1, 200, 'Merchant B', '2023-05-02 12:30:00'),
(3, 2, 150, 'Merchant A', '2023-05-03 15:45:00'),
(4, 2, 300, 'Merchant C', '2023-05-04 09:15:00'),
(5, 3, 250, 'Merchant B', '2023-05-05 14:20:00'),
(6, 3, 180, 'Merchant C', '2023-05-06 17:30:00'),
(7, 4, 120, 'Merchant A', '2023-05-07 11:45:00'),
(8, 4, 220, 'Merchant C', '2023-05-08 13:10:00'),
(9, 5, 280, 'Merchant B', '2023-05-09 16:55:00'),
(10, 5, 150, 'Merchant A', '2023-05-10 19:00:00'),
(11, 6, 200, 'Merchant A', '2023-05-11 10:30:00'),
(12, 6, 150, 'Merchant C', '2023-05-12 12:45:00'),
(13, 7, 180, 'Merchant B', '2023-05-13 15:20:00'),
(14, 7, 300, 'Merchant C', '2023-05-14 18:30:00'),
(15, 8, 120, 'Merchant A', '2023-05-15 11:45:00'),
(16, 8, 220, 'Merchant B', '2023-05-16 13:10:00'),
(17, 9, 250, 'Merchant C', '2023-05-17 16:55:00'),
(18, 9, 150, 'Merchant A', '2023-05-18 19:00:00'),
(19, 10, 200, 'Merchant A', '2023-05-19 10:30:00'),
(20, 10, 180, 'Merchant B', '2023-05-20 12:45:00'),
(21, 11, 300, 'Merchant C', '2023-05-21 15:20:00'),
(22, 11, 150, 'Merchant A', '2023-05-22 18:30:00'),
(23, 12, 120, 'Merchant A', '2023-05-23 11:45:00'),
(24, 1, 100, 'Merchant A', '2023-05-01 10:00:00'),
(25, 1, 200, 'Merchant B', '2023-05-02 12:30:00'),
(26, 2, 150, 'Merchant A', '2023-05-03 15:45:00'),
(27, 2, 300, 'Merchant C', '2023-05-04 09:15:00'),
(28, 3, 250, 'Merchant B', '2023-05-05 14:20:00'),
(29, 3, 180, 'Merchant C', '2023-05-06 17:30:00'),
(30, 4, 120, 'Merchant A', '2023-05-07 11:45:00'),
(31, 4, 220, 'Merchant C', '2023-05-08 13:10:00'),
(32, 5, 280, 'Merchant B', '2023-05-09 16:55:00'),
(33, 5, 150, 'Merchant A', '2023-05-10 19:00:00'),
(34, 6, 200, 'Merchant A', '2023-05-11 10:30:00'),
(35, 6, 150, 'Merchant C', '2023-05-12 12:45:00'),
(36, 7, 180, 'Merchant B', '2023-05-13 15:20:00'),
(37, 7, 300, 'Merchant C', '2023-05-14 18:30:00'),
(38, 8, 120, 'Merchant A', '2023-05-15 11:45:00'),
(39, 8, 220, 'Merchant B', '2023-05-16 13:10:00'),
(40, 9, 250, 'Merchant C', '2023-05-17 16:55:00'),
]
cursor.executemany('INSERT INTO transactions VALUES (?, ?, ?, ?, ?)', transactions_data)
cursor.execute('SELECT * FROM customers')
import pandas as pd
customers_df = pd.read_sql_query('SELECT * FROM customers', conn)
customers_df.head(20)
transactions_df = pd.read_sql_query('SELECT * FROM transactions', conn)
transactions_df.head(20)
merged_df = pd.merge(customers_df, transactions_df, on='customer_id')
merged_df.head(20)

Comments
Post a Comment