import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


data = pd.read_csv('D:/data analysis/StudentsPerformance using python/StudentsPerformance.csv')
data.head(5)


data.describe(include=['object'])


data.shape

(1000, 8)


data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   gender                       1000 non-null   object
 1   race/ethnicity               1000 non-null   object
 2   parental level of education  1000 non-null   object
 3   lunch                        1000 non-null   object
 4   test preparation course      1000 non-null   object
 5   math score                   1000 non-null   int64 
 6   reading score                1000 non-null   int64 
 7   writing score                1000 non-null   int64 
dtypes: int64(3), object(5)
memory usage: 62.6+ KB


data.isnull().sum()

gender                         0
race/ethnicity                 0
parental level of education    0
lunch                          0
test preparation course        0
math score                     0
reading score                  0
writing score                  0
dtype: int64


data.duplicated().sum()

0


data['gender'].value_counts().plot(kind='pie',autopct='%.1f%%',shadow=True)

<AxesSubplot:ylabel='gender'>


GenderByGroup = data[['race/ethnicity','gender']].value_counts().to_frame().sort_values(by='race/ethnicity').rename(columns={0:'count'})
GenderByGroup


GenderByGroup.to_csv('D:/data analysis/StudentsPerformance using python/GenderByGroup.csv')


sns.countplot(data=data,x='test preparation course',hue='gender',palette='cool')

<AxesSubplot:xlabel='test preparation course', ylabel='count'>


LevelOfEducation = data['parental level of education'].value_counts()
LevelOfEducation

some college          226
associate's degree    222
high school           196
some high school      179
bachelor's degree     118
master's degree        59
Name: parental level of education, dtype: int64


plt.figure(figsize=(20,5))
sns.barplot(x=LevelOfEducation.index,y=LevelOfEducation.values,palette='CMRmap_r')
for i in range(len(LevelOfEducation.values)):
    plt.text(i,LevelOfEducation.values[i],LevelOfEducation.values[i],fontsize=20)


data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   gender                       1000 non-null   object
 1   race/ethnicity               1000 non-null   object
 2   parental level of education  1000 non-null   object
 3   lunch                        1000 non-null   object
 4   test preparation course      1000 non-null   object
 5   math score                   1000 non-null   int64 
 6   reading score                1000 non-null   int64 
 7   writing score                1000 non-null   int64 
dtypes: int64(3), object(5)
memory usage: 62.6+ KB


data[['math score','reading score','writing score']].agg(['min','max','mean'])


px.histogram(data,x='math score')


px.histogram(data,x='writing score')


data['total'] = data['math score']+data['writing score']+data['reading score']
data


data.nlargest(10,'total')


data.nsmallest(10,'total')

	gender	race/ethnicity	parental level of education	lunch	test preparation course	math score	reading score	writing score
0	female	group B	bachelor's degree	standard	none	72	72	74
1	female	group C	some college	standard	completed	69	90	88
2	female	group B	master's degree	standard	none	90	95	93
3	male	group A	associate's degree	free/reduced	none	47	57	44
4	male	group C	some college	standard	none	76	78	75

		count
race/ethnicity	gender
group A	male	53
group A	female	36
group B	female	104
group B	male	86
group C	female	180
group C	male	139
group D	male	133
group D	female	129
group E	male	71
group E	female	69

	math score	reading score	writing score
min	0.000	17.000	10.000
max	100.000	100.000	100.000
mean	66.089	69.169	68.054

	gender	race/ethnicity	parental level of education	lunch	test preparation course	math score	reading score	writing score	total
0	female	group B	bachelor's degree	standard	none	72	72	74	218
1	female	group C	some college	standard	completed	69	90	88	247
2	female	group B	master's degree	standard	none	90	95	93	278
3	male	group A	associate's degree	free/reduced	none	47	57	44	148
4	male	group C	some college	standard	none	76	78	75	229
...	...	...	...	...	...	...	...	...	...
995	female	group E	master's degree	standard	completed	88	99	95	282
996	male	group C	high school	free/reduced	none	62	55	55	172
997	female	group C	high school	free/reduced	completed	59	71	65	195
998	female	group D	some college	standard	completed	68	78	77	223
999	female	group D	some college	free/reduced	none	77	86	86	249

	gender	race/ethnicity	parental level of education	lunch	test preparation course	math score	reading score	writing score	total
458	female	group E	bachelor's degree	standard	none	100	100	100	300
916	male	group E	bachelor's degree	standard	completed	100	100	100	300
962	female	group E	associate's degree	standard	none	100	100	100	300
114	female	group E	bachelor's degree	standard	completed	99	100	100	299
179	female	group D	some high school	standard	completed	97	100	100	297
712	female	group D	some college	standard	none	98	100	99	297
165	female	group C	bachelor's degree	standard	completed	96	100	100	296
625	male	group D	some college	standard	completed	100	97	99	296
149	male	group E	associate's degree	free/reduced	completed	100	100	93	293
685	female	group E	master's degree	standard	completed	94	99	100	293

	gender	race/ethnicity	parental level of education	lunch	test preparation course
count	1000	1000	1000	1000	1000
unique	2	5	6	2	2
top	female	group C	some college	standard	none
freq	518	319	226	645	642