Connecting SQL using Pandas

Connecting SQL using Pandas and MySQL 


import pandas as pd

import mysql.connector


# Connect to the MySQL database

conn = mysql.connector.connect(

    host='localhost',

    user='your_username',

    password='your_password',

    database='your_database'

)


# Create a cursor object

cursor = conn.cursor()


# Query to fetch customer details from the customers table

customer_query = 'SELECT * FROM customers'


# Fetch customer details into a DataFrame

customers_df = pd.read_sql_query(customer_query, conn)


# Query to fetch transaction details from the transactions table

transaction_query = 'SELECT * FROM transactions'


# Fetch transaction details into a DataFrame

transactions_df = pd.read_sql_query(transaction_query, conn)


# Query to fetch loan details from the loans table

loan_query = 'SELECT * FROM loans'


# Fetch loan details into a DataFrame

loans_df = pd.read_sql_query(loan_query, conn)


# Print the DataFrames

print("Customers DataFrame:")

print(customers_df)


print("Transactions DataFrame:")

print(transactions_df)


print("Loans DataFrame:")

print(loans_df)


# Close the connection

conn.close()

_____________________________________________________________________________________

Creating Dummy -


import sqlite3


# Connect to an in-memory SQLite database

conn = sqlite3.connect(':memory:')

cursor = conn.cursor()


# Create the customers table

cursor.execute('''

    CREATE TABLE customers (

        customer_id INTEGER PRIMARY KEY,

        age INTEGER,

        income REAL,

        credit_score INTEGER,

        loan_amount REAL

    )

''')


# Create the transactions table

cursor.execute('''

    CREATE TABLE transactions (

        transaction_id INTEGER PRIMARY KEY,

        customer_id INTEGER,

        amount REAL,

        merchant TEXT,

        timestamp TEXT

    )

''')


# Insert dummy data into the customers table

customers_data = [

(1, 30, 50000, 700, None),

    (2, 35, 60000, 750, None),

    (3, 40, 70000, 800, None),

    (4, 35, 55000, 720, None),

    (5, 32, 48000, 690, None),

    (6, 45, 75000, 810, None),

    (7, 37, 62000, 760, None),

    (8, 31, 51000, 710, None),

    (9, 33, 53000, 730, None),

    (10, 38, 65000, 770, None),

    (11, 36, 58000, 740, None),

    (12, 39, 68000, 780, None),

    (13, 34, 52000, 720, None),

    (14, 29, 49000, 690, None),

    (15, 42, 72000, 800, None),

    (16, 28, 47000, 680, None),

    (17, 41, 69000, 790, None),

    (18, 32, 50000, 710, None),

    (19, 34, 51000, 730, None),

    (20, 37, 62000, 770, None),

    (21, 33, 54000, 720, None),

    (22, 35, 55000, 740, None),

    (23, 30, 48000, 690, None),

    (24, 38, 66000, 780, None),

    (25, 31, 52000, 710, None),

    (26, 43, 70000, 800, None),

    (27, 39, 64000, 760, None),

    (28, 36, 58000, 740, None),

    (29, 29, 47000, 680, None),

    (30, 40, 73000, 790, None),

    (31, 32, 50000, 710, None),

    (32, 34, 51000, 730, None),

    (33, 36, 59000, 750, None),

    (34, 35, 55000, 740, None),

    (35, 31, 49000, 700, None),

    (36, 42, 69000, 800, None),

    (37, 28, 47000, 680, None),

    (38, 37, 63000, 760, None),

    (39, 33, 52000, 710, None),

    (40, 30, 48000, 690, None),

]


cursor.executemany('INSERT INTO customers VALUES (?, ?, ?, ?, ?)', customers_data)


# Insert dummy data into the transactions table

transactions_data = [

    (1, 1, 100, 'Merchant A', '2023-05-01 10:00:00'),

    (2, 1, 200, 'Merchant B', '2023-05-02 12:30:00'),

    (3, 2, 150, 'Merchant A', '2023-05-03 15:45:00'),

    (4, 2, 300, 'Merchant C', '2023-05-04 09:15:00'),

    (5, 3, 250, 'Merchant B', '2023-05-05 14:20:00'),

    (6, 3, 180, 'Merchant C', '2023-05-06 17:30:00'),

    (7, 4, 120, 'Merchant A', '2023-05-07 11:45:00'),

    (8, 4, 220, 'Merchant C', '2023-05-08 13:10:00'),

    (9, 5, 280, 'Merchant B', '2023-05-09 16:55:00'),

    (10, 5, 150, 'Merchant A', '2023-05-10 19:00:00'),

    (11, 6, 200, 'Merchant A', '2023-05-11 10:30:00'),

    (12, 6, 150, 'Merchant C', '2023-05-12 12:45:00'),

    (13, 7, 180, 'Merchant B', '2023-05-13 15:20:00'),

    (14, 7, 300, 'Merchant C', '2023-05-14 18:30:00'),

    (15, 8, 120, 'Merchant A', '2023-05-15 11:45:00'),

    (16, 8, 220, 'Merchant B', '2023-05-16 13:10:00'),

    (17, 9, 250, 'Merchant C', '2023-05-17 16:55:00'),

    (18, 9, 150, 'Merchant A', '2023-05-18 19:00:00'),

    (19, 10, 200, 'Merchant A', '2023-05-19 10:30:00'),

    (20, 10, 180, 'Merchant B', '2023-05-20 12:45:00'),

    (21, 11, 300, 'Merchant C', '2023-05-21 15:20:00'),

    (22, 11, 150, 'Merchant A', '2023-05-22 18:30:00'),

    (23, 12, 120, 'Merchant A', '2023-05-23 11:45:00'),

    (24, 1, 100, 'Merchant A', '2023-05-01 10:00:00'),

    (25, 1, 200, 'Merchant B', '2023-05-02 12:30:00'),

    (26, 2, 150, 'Merchant A', '2023-05-03 15:45:00'),

    (27, 2, 300, 'Merchant C', '2023-05-04 09:15:00'),

    (28, 3, 250, 'Merchant B', '2023-05-05 14:20:00'),

    (29, 3, 180, 'Merchant C', '2023-05-06 17:30:00'),

    (30, 4, 120, 'Merchant A', '2023-05-07 11:45:00'),

    (31, 4, 220, 'Merchant C', '2023-05-08 13:10:00'),

    (32, 5, 280, 'Merchant B', '2023-05-09 16:55:00'),

    (33, 5, 150, 'Merchant A', '2023-05-10 19:00:00'),

    (34, 6, 200, 'Merchant A', '2023-05-11 10:30:00'),

    (35, 6, 150, 'Merchant C', '2023-05-12 12:45:00'),

    (36, 7, 180, 'Merchant B', '2023-05-13 15:20:00'),

    (37, 7, 300, 'Merchant C', '2023-05-14 18:30:00'),

    (38, 8, 120, 'Merchant A', '2023-05-15 11:45:00'),

    (39, 8, 220, 'Merchant B', '2023-05-16 13:10:00'),

    (40, 9, 250, 'Merchant C', '2023-05-17 16:55:00'),

]


cursor.executemany('INSERT INTO transactions VALUES (?, ?, ?, ?, ?)', transactions_data)

cursor.execute('SELECT * FROM customers')


import pandas as pd

customers_df = pd.read_sql_query('SELECT * FROM customers', conn)

customers_df.head(20)


transactions_df = pd.read_sql_query('SELECT * FROM transactions', conn)

transactions_df.head(20)


merged_df = pd.merge(customers_df, transactions_df, on='customer_id')

merged_df.head(20)





Comments

Popular Posts