Some Practical program implemented in Python 3.12, can be used for AI/ML
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# Matrix multiplication
import numpy as np
A = np.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
print("3x3 Matrix: A: ")
print(A)
B = np.array([[1, 4, 7],
[2, 5, 8],
[3, 6, 9]])
print("3x3 Matrix: B :")
print(B)
C = np.dot(A, B)
# we also use C=A@B
print("3x3 Matrix: C(result of multiplication) :")
print(C)
1
2
3
4
5
6
7
8
9
10
11
12
3x3 Matrix: A:
[[1 2 3]
[4 5 6]
[7 8 9]]
3x3 Matrix: B :
[[1 4 7]
[2 5 8]
[3 6 9]]
3x3 Matrix: C(result of multiplication) :
[[ 14 32 50]
[ 32 77 122]
[ 50 122 194]]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# finding the inverse of a Matrix
import numpy as np
from scipy import linalg
# Define matrix A
A = np.array([[1, 2, 3],
[0, 1, 4],
[5, 6, 0]])
# Calculate determinant
det = linalg.det(A)
print("Matrix A:")
print(A)
print("\nDeterminant:", det)
# Check if matrix is invertible
if det != 0:
A_inv = linalg.inv(A)
print("\nMatrix is invertible.")
print("Inverse of A:")
print(A_inv)
else:
print("\nMatrix is NOT invertible (determinant = 0).")
1
2
3
4
5
6
7
8
9
10
11
12
Matrix A:
[[1 2 3]
[0 1 4]
[5 6 0]]
Determinant: 0.9999999999999987
Matrix is invertible.
Inverse of A:
[[-24. 18. 5.]
[ 20. -15. -4.]
[ -5. 4. 1.]]
1
2
3
4
5
6
7
8
9
10
11
12
13
# Euclidean distance calculation
import math
# input coordinates
x1 = float(input("Enter x1: "))
y1 = float(input("Enter y1: "))
x2 = float(input("Enter x2: "))
y2 = float(input("Enter y2: "))
# calculate Euclidean distance
distance = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
#distance = math.sqrt(math.pow(x2-x1,2) + math.pow(y2-y1,2)) this can also be used
print("Euclidean Distance =", distance)
1
2
3
4
5
Enter x1: 1
Enter y1: 2
Enter x2: 3
Enter y2: 4
Euclidean Distance = 2.8284271247461903
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# alternative method
import math
# Points stored in arrays
A = [2, 3]
B = [6, 7]
# Calculate Euclidean distance
sum_sq = 0
for i in range(len(A)):
sum_sq += (A[i] - B[i]) ** 2
distance = math.sqrt(sum_sq)
print("Euclidean Distance =", distance)
1
Euclidean Distance = 5.656854249492381
1
2
3
4
5
6
7
8
9
10
11
12
# alternative method
# use of zip function
import math
# Points stored in arrays
A = [2, 3]
B = [6, 7]
# Calculate Euclidean distance
distance = math.sqrt(sum((a - b) ** 2 for a, b in zip(A, B)))
print("Euclidean Distance =", distance)
1
Euclidean Distance = 5.656854249492381
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import math
def euclidean_distance(vector1, vector2):
if len(vector1) == len(vector2):
squared_differences_sum = 0
for i in range(len(vector1)):
squared_differences_sum += (vector1[i] - vector2[i]) ** 2
return math.sqrt(squared_differences_sum)
else:
Print("Vectors must have the same dimensions.")
# Example Usage- 2D:
vector_a = [10, 20]
vector_b = [15, 25]
distance_2d = euclidean_distance(vector_a, vector_b)
print(f"The Euclidean distance between {vector_a} and {vector_b} is: {distance_2d}")
# Example Usage 3D:
vector_c = [1, 2, 3]
vector_d = [4, 5, 6]
distance_3d = euclidean_distance(vector_c, vector_d)
print(f"The Euclidean distance between {vector_c} and {vector_d} is: {distance_3d}")
1
2
The Euclidean distance between [10, 20] and [15, 25] is: 7.0710678118654755
The Euclidean distance between [1, 2, 3] and [4, 5, 6] is: 5.196152422706632
1
2
3
4
5
6
7
8
9
# read csv file
import os
import pandas as pd
# Read CSV file
data = pd.read_csv('C:/hs1.csv')
# Display the data(5)
print(data.head())
1
2
3
4
5
6
name age income
0 RC 33 33056
1 AS 45 45056
2 AC 38 38056
3 AD 13 13056
4 AF 16 16056
1
2
3
4
5
6
7
8
9
10
11
12
13
# read csv file
import os
import pandas as pd
# Read CSV file
data = pd.read_csv('C:/hs1.csv')
#display particular column
data1=data['name']
# Display the data (5)
print(data1.head())
1
2
3
4
5
6
0 RC
1 AS
2 AC
3 AD
4 AF
Name: name, dtype: object
1
2
3
4
5
6
7
# Alternative method - good as the column name will also appear
import os
import pandas as pd
# Read CSV file
data = pd.read_csv('C:/hs1.csv', usecols=["name"])
print(data.head())
1
2
3
4
5
6
name
0 RC
1 AS
2 AC
3 AD
4 AF
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# finding correlation coefficient
def pearson_correlation(x, y):
n = len(x)
if n != len(y) or n == 0:
return None # Invalid input
# Calculate means
sum_x = sum(x)
sum_y = sum(y)
mean_x = sum_x / n
mean_y = sum_y / n
# Calculate standard deviations
sum_sq_x = sum([(i - mean_x)**2 for i in x])
sum_sq_y = sum([(i - mean_y)**2 for i in y])
try:
std_x = (sum_sq_x / n)**0.5
std_y = (sum_sq_y / n)**0.5
except ZeroDivisionError:
return 0.0 # Handle cases with zero standard deviation
# Calculate covariance
covariance = sum([(x[i] - mean_x) * (y[i] - mean_y) for i in range(n)]) / n
# Calculate Pearson correlation coefficient
try:
correlation_coefficient = covariance / (std_x * std_y)
except ZeroDivisionError:
return 0.0
return correlation_coefficient
# Example Usage (replace with your data)
age = [25, 30, 35, 40, 45]
income = [50000, 60000, 70000, 80000, 90000]
correlation = pearson_correlation(age, income)
print(f"Pearson correlation coefficient: {correlation}")
1
Pearson correlation coefficient: 1.0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# alternatively
import numpy as np
import random
x=[1,2,3,5,7,8,9,4,6]
y=[11,22,33,55,77,88,99,44,66]
x_bar=sum(x)/len(x)
y_bar=sum(y)/len(y)
n=len(x) # equals len(y) also
print(n)
print(f'x_bar= {x_bar}, y_bar={y_bar}')
n_cov_x_y=0
for xi,yi in zip(x,y):
n_cov_x_y += (xi-x_bar)*(yi-y_bar)
cov_x_y = n_cov_x_y/n
print(f'covariance ={cov_x_y}')
sigma_x_sqr=0
sigma_y_sqr=0
for k in range(n):
sigma_x_sqr += (x[k]-x_bar)**2
for l in range(n):
sigma_y_sqr += (y[l]-y_bar)**2
# Calculate standard deviations
sigma_x=((sigma_x_sqr/n)**0.5)
sigma_y=((sigma_y_sqr/n)**0.5)
r=cov_x_y/(sigma_x*sigma_y)
print(f'Pearson correlation coefficient ={r}')
1
2
3
4
9
x_bar= 5.0, y_bar=55.0
covariance =73.33333333333333
Pearson correlation coefficient =1.0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# linear regression
import numpy as np
import random
x=[2,3,5,7,8,9,4,12]
y=[22,33,55,81,89,96,48,127]
m=1
c=1
learning_rate=.01
def grad_desc(x,y,m,c,learning_rate):
dldm=0
dldc=0
n=len(x)
for xi,yi in zip(x,y):
dldm += -2*xi*(yi-(m*xi+c))
dldc += -2*(yi-(m*xi+c))
m=m-(1/n)*learning_rate*dldm
c=c-(1/n)*learning_rate*dldc
return m, c
def mse(y_true, y_pred):
y_true = np.array(y_true)
y_pred = np.array(y_pred)
return np.mean((y_true - y_pred) ** 2)
# Assuming x and y are NumPy arrays
x = np.array(x) # Ensure x is a NumPy array
y = np.array(y) # Ensure y is a NumPy array
for epoch in range(100):
m,c = grad_desc(x,y,m,c,learning_rate)
yhat= m*x+c
loss= mse(y,yhat)
print(f'epoch: {epoch}, loss ={loss} for m={m},c={c}')
1
2
3
4
5
6
7
epoch: 0, loss =6.017884375 for m=10.5875,c=2.2325
epoch: 1, loss =5.941109893984372 for m=10.6251875,c=2.2419124999999998
epoch: 2, loss =5.939060294149141 for m=10.6247646875,c=2.2464258125
......
epoch: 97, loss =5.80328770131262 for m=10.579367442770804,c=2.6008956505093472
epoch: 98, loss =5.802337163530998 for m=10.578975392541746,c=2.6039568071528096
epoch: 99, loss =5.801394197664435 for m=10.578584906956733,c=2.607005746942035
1
2
3
4
5
6
7
8
9
10
11
12
13
# using statmodel
import numpy as np
import statsmodels.api as sm
X=np.array([2,3,5,7,8,9,4,12]) # Independent variable
Y=np.array([22,33,55,81,89,96,48,127]) # Dependent variable
X = sm.add_constant(X) # Add intercept
model = sm.OLS(Y, X).fit() # Fit regression model
print(model.params)
1
[ 3.36792453 10.48113208]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# creating random linear function
import numpy as np
import pandas as pd
import random
# Set a random offset
r_d = random.randint(0, 16)
# List to store the (x, y) pairs
data = []
# Generate 100 pairs
for i in range(100):
r_x = random.randint(0, 256)
r_y = 4 * r_x + 4 + r_d
data.append((r_x, r_y))
# Create a DataFrame
df = pd.DataFrame(data, columns=['x', 'y'])
# Display the first few rows
print(df.head())
#df.to_csv("random_data.csv", index=False)
1
2
3
4
5
6
x y
0 72 301
1 156 637
2 8 45
3 206 837
4 164 669
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# Generation of random (x, y) pairs where y = f(x) + d (d varies from -r to +r ,
# a random value ), f being a linear function
# Linear regression or line fitting of the data
# Optimizing the function using gradient descent
# Plotting the steps using matplotlib
import numpy as np
import matplotlib.pyplot as plt
# True parameters
m_true = 2
c_true = 1
r = 2
# Generate random x values
x = np.linspace(0, 10, 50)
# Random noise
d = np.random.uniform(-r, r, len(x))
# Generate y values
y = m_true * x + c_true + d
# Initial guess for parameters
m = 0
c = 0
learning_rate = 0.01
epochs = 20
plt.scatter(x, y, color='blue', label='Data')
# Gradient descent
for i in range(epochs):
y_pred = m*x + c
# gradients
dm = (-2/len(x)) * np.sum(x*(y - y_pred))
dc = (-2/len(x)) * np.sum(y - y_pred)
# update parameters
m = m - learning_rate*dm
c = c - learning_rate*dc
# plot intermediate lines
plt.plot(x, y_pred, alpha=0.3)
# Final fitted line
plt.plot(x, m*x + c, color='red', linewidth=3, label='Fitted Line')
plt.legend()
plt.title("Linear Regression using Gradient Descent")
plt.show()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# alternatively
import numpy as np
import pandas as pd
import random
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
# Create and store data
data = []
for i in range(100):
r_d = random.randint(0, 316) # Generate random offset
r_x = random.randint(0, 256)
r_y = 4 * r_x + 4 + r_d
data.append((r_x, r_y))
# Create DataFrame
df = pd.DataFrame(data, columns=['x', 'y'])
# Print sample
print(df.head())
# Save to CSV (optional)
#df.to_csv("random_data.csv", index=False)
# Reshape x for scikit-learn (expects 2D input)
X = df[['x']] # or df['x'].values.reshape(-1, 1)
y = df['y']
# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, y)
# Get model parameters
slope = model.coef_[0]
intercept = model.intercept_
print(f"\nFitted Linear Model: y = {slope:.2f}x + {intercept:.2f}")
# Predict y values for plotting
y_pred = model.predict(X)
# Plotting
plt.scatter(X, y, color='blue', label='Actual Data')
plt.plot(X, y_pred, color='red', linewidth=2, label='Regression Line')
plt.title('Linear Regression Fit')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True)
plt.show()
1
2
3
4
5
6
7
8
x y
0 61 320
1 191 808
2 81 337
3 206 1006
4 186 862
Fitted Linear Model: y = 3.91x + 163.11
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# LR without using ML
import numpy as np
# 1. Define the same sample data
# X = 'Hours Studied', y = 'Test Score'
X = np.array([1, 2, 3, 4, 5])
y = np.array([50, 65, 75, 80, 95])
# 2. Count the number of data points (n)
n = len(X)
# 3. Calculate all the required raw sums
sum_x = np.sum(X)
sum_y = np.sum(y)
sum_xy = np.sum(X * y)
sum_x_squared = np.sum(X ** 2)
#denominator for bith is same
denominator = (n * sum_x_squared) - (sum_x ** 2)
# 4. Apply the raw score formula for slope (a, may be call m)
numerator_a = (n * sum_xy) - (sum_x * sum_y)
a = numerator_a / denominator
# 5. Apply the raw score formula for intercept (b, may be call c)
numerator_b= (sum_y*sum_x_squared) - (sum_x*sum_xy)
b=numerator_b / denominator
print(f'The required slope is {b}, the required intercept is {a}')
1
The required slope is 41.5, the required intercept is 10.5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Prediction using LR
import numpy as np
# 1. Define the same sample data
# X = 'Hours Studied', y = 'Test Score'
X = np.array([1, 2, 3, 4, 5])
y = np.array([50, 65, 75, 80, 95])
# 2. Count the number of data points (n)
n = len(X)
# 3. Calculate all the required raw sums
sum_x = np.sum(X)
sum_y = np.sum(y)
sum_xy = np.sum(X * y)
sum_x_squared = np.sum(X ** 2)
# 4. Apply the raw score formula for slope (m)
numerator_m = (n * sum_xy) - (sum_x * sum_y)
denominator_m = (n * sum_x_squared) - (sum_x ** 2)
m = numerator_m / denominator_m
# 5. Apply the raw score formula for y-intercept (c)
c = (sum_y - (m * sum_x)) / n
# 6. Output the results
print("--- Alternative Linear Regression Formula Implemented ---")
print(f"Slope (m): {m}")
print(f"Y-Intercept (c): {c}")
print(f"Equation of the line: y = {m}x + {c}")
# 7. Make a prediction
x_new = 3.5
y_pred = (m * x_new) + c
print(f"\nPrediction for X = {x_new}: y = {y_pred}")
1
2
3
4
5
6
--- Alternative Linear Regression Formula Implemented ---
Slope (m): 10.5
Y-Intercept (c): 41.5
Equation of the line: y = 10.5x + 41.5
Prediction for X = 3.5: y = 78.25
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Logistic regression
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
# 1. Load the Iris dataset
iris = datasets.load_iris()
X = iris.data # Features: sepal length, sepal width, petal length, petal width
y = iris.target # Target: 0 (setosa), 1 (versicolor), 2 (virginica)
# 2. Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 3. Initialize the Multinomial Logistic Regression model
# - multi_class='multinomial': Tells the model to use the Softmax loss function for multiclass
# - solver='lbfgs': An optimization algorithm that works well for multinomial loss
# - max_iter=200: Gives the algorithm enough iterations to converge
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=200)
# 4. Train the model using the training data
model.fit(X_train, y_train)
# 5. Make predictions on the testing data
y_pred = model.predict(X_test)
# 6. Evaluate the model's performance
print("--- Multiclass Logistic Regression Results ---\n")
print(f"Overall Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%\n")
print("Detailed Classification Report:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
--- Multiclass Logistic Regression Results ---
Overall Accuracy: 100.00%
Detailed Classification Report:
precision recall f1-score support
setosa 1.00 1.00 1.00 10
versicolor 1.00 1.00 1.00 9
virginica 1.00 1.00 1.00 11
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
1
2
3
4
5
# Finding Entropy
from scipy.stats import entropy
p = [0.9, 0.1] # probability distribution
ent = entropy(p, base=2)
print("Entropy:", ent)
1
Entropy: 0.46899559358928117
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Decision tree
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
# 1. Load the Iris dataset
iris = datasets.load_iris()
X = iris.data # Features: sepal length, sepal width, petal length, petal width
y = iris.target # Target: 0 (setosa), 1 (versicolor), 2 (virginica)
# 2. Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 3. Initialize the Decision Tree Classifier
# Setting max_depth=3 keeps the tree simple and prevents it from memorizing the training data.
# random_state=42 ensures you get the exact same results every time you run the code.
model = DecisionTreeClassifier(max_depth=3, random_state=42)
# 4. Train the model using the training data
model.fit(X_train, y_train)
# 5. Make predictions on the testing data
y_pred = model.predict(X_test)
# 6. Evaluate the model's performance
print("--- Decision Tree Classifier Results ---\n")
print(f"Overall Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%\n")
print("Detailed Classification Report:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
--- Decision Tree Classifier Results ---
Overall Accuracy: 100.00%
Detailed Classification Report:
precision recall f1-score support
setosa 1.00 1.00 1.00 10
versicolor 1.00 1.00 1.00 9
virginica 1.00 1.00 1.00 11
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
1
This post is licensed under CC BY 4.0 by the author.