# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Set the style for visualizations
sns.set_style("whitegrid")
# Load the dataset
file_path = "Enrollment in Government and Non_Government Primary Schools by Age and Sex_2022-1.xlsx"
df = pd.read_excel(file_path)
# Display the first few rows of the dataset
df.head()
| Region | Council | Ward | School | RegNo | Ownership | Below6YearsBoys | Below6YearsGirls | 6yearsBoys | 6yearsGirls | ... | 11yearsGirls | 12yearsBoys | 12yearsGirls | 13yearsBoys | 13yearsGirls | Above13yearsBoys | Above13yearsGirls | TotalBoys | TotalGirls | TotalEnrollment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Arusha | Arusha | Bangata | BANGATA | EM.1865 | Government | 1 | 0 | 23 | 21 | ... | 25 | 28 | 14 | 17 | 12 | 29 | 15 | 248 | 193 | 441 |
| 1 | Arusha | Arusha | Bangata | ENGIKARETI | EM.14567 | Government | 0 | 0 | 25 | 32 | ... | 30 | 28 | 24 | 39 | 25 | 30 | 26 | 207 | 220 | 427 |
| 2 | Arusha | Arusha | Bangata | IKIRWA | EM.15402 | Non-Government | 0 | 0 | 6 | 1 | ... | 9 | 8 | 7 | 4 | 2 | 2 | 0 | 81 | 70 | 151 |
| 3 | Arusha | Arusha | Bangata | MIDAWE | EM.4582 | Government | 0 | 0 | 29 | 26 | ... | 40 | 23 | 24 | 9 | 11 | 34 | 32 | 247 | 241 | 488 |
| 4 | Arusha | Arusha | Bangata | SASI | EM.3409 | Government | 7 | 9 | 22 | 25 | ... | 20 | 26 | 31 | 16 | 26 | 11 | 8 | 180 | 214 | 394 |
5 rows × 29 columns
# Calculate the distribution of school ownership
ownership_counts = df['Ownership'].value_counts()
# Create a pie chart
plt.figure(figsize=(8, 8))
plt.pie(ownership_counts, labels=ownership_counts.index, autopct='%1.1f%%', startangle=140, shadow=True)
plt.title('Distribution of School Ownership in 2022')
# Add count information to the legend
legend_labels = [f'{label} ({count})' for label, count in zip(ownership_counts.index, ownership_counts)]
plt.legend(legend_labels, loc='upper right')
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()
# Sum the "TotalBoys" and "TotalGirls" across all rows
total_boys = df['TotalBoys'].sum()
total_girls = df['TotalGirls'].sum()
# Create a DataFrame for the totals
totals_data = pd.DataFrame({'Gender': ['Total Boys', 'Total Girls'], 'Count': [total_boys, total_girls]})
# Create a pie chart
plt.figure(figsize=(8, 8))
plt.pie(totals_data['Count'], labels=totals_data['Gender'], autopct='%1.1f%%', startangle=140, shadow=True)
plt.title('Distribution of Total Boys and Total Girls in 2022')
# Add count information to the legend
legend_labels = [f'{label}: {count}' for label, count in zip(totals_data['Gender'], totals_data['Count'])]
plt.legend(legend_labels, loc='upper right')
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
# Assuming you've already loaded your dataset into the 'df' DataFrame
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")
# Group the data by 'Region' and count the unique values in the 'School' column
schools_per_region = df.groupby('region')['school'].nunique().reset_index()
# Rename the count column for clarity
schools_per_region.rename(columns={'school': 'SchoolCount'}, inplace=True)
# Sort the data by SchoolCount in descending order
schools_per_region = schools_per_region.sort_values(by='SchoolCount', ascending=False)
# Create a bar chart to visualize the number of schools per region
plt.figure(figsize=(12, 6))
plt.bar(schools_per_region['region'], schools_per_region['SchoolCount'])
plt.title('Number of Schools in Each Region')
plt.xlabel('Region')
plt.ylabel('Number of Schools')
plt.xticks(rotation=90) # Rotate x-axis labels for better readability
# Display the plot
plt.tight_layout()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
# Assuming you've already loaded your dataset into the 'df' DataFrame
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")
# Group the data by 'Region' and calculate the sum of 'TotalEnrollment' in each region
total_enrollment_by_region = df.groupby('region')['totalenrollment'].sum().reset_index()
# Sort the DataFrame by 'TotalEnrollment' in descending order
total_enrollment_by_region = total_enrollment_by_region.sort_values(by='totalenrollment', ascending=False)
# Create a bar chart
plt.figure(figsize=(12, 6))
plt.bar(total_enrollment_by_region['region'], total_enrollment_by_region['totalenrollment'])
plt.xlabel('Region')
plt.ylabel('Total Enrollment')
plt.title('Total Enrollment by Region in 2022')
plt.xticks(rotation=90) # Rotate x-axis labels for better readability
plt.tight_layout()
# Show the plot
plt.show()
# Calculate the total enrollment (TotalBoys + TotalGirls) for each row
df['TotalEnrollment'] = df['TotalBoys'] + df['TotalGirls']
# Group the data by Region and calculate the total enrollment and number of schools for each region
region_summary = df.groupby('Region').agg({'TotalEnrollment': 'sum', 'School': 'count'}).reset_index()
# Calculate the enrollment ratio (total enrollment per school) for each region
region_summary['EnrollmentRatio'] = region_summary['TotalEnrollment'] / region_summary['School']
# Sort the data by enrollment ratio for better visualization
region_summary_sorted = region_summary.sort_values(by='EnrollmentRatio', ascending=False)
# Create a bar chart to visualize the enrollment ratio for each region
plt.figure(figsize=(12, 6))
sns.barplot(x=region_summary_sorted['Region'], y=region_summary_sorted['EnrollmentRatio'], palette="Set3")
plt.title("Enrollment Ratio by Region in 2022")
plt.xlabel("Region")
plt.ylabel("Enrollment Ratio (Total Enrollment per School)")
plt.xticks(rotation=45, ha="right")
plt.show()
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector): C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector): C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector):
import pandas as pd
import matplotlib.pyplot as plt
# Define age group categories
age_groups = ['Below6Years', '6years', '7years', '8years', '9years', '10years', '11years', '12years', '13years', 'Above13years']
# Initialize lists to store the enrollment for boys and girls in each age group
boys_enrollment = []
girls_enrollment = []
# Calculate the total enrollment for each age group and gender
for age_group in age_groups:
total_boys = df[f'{age_group}Boys'].sum()
total_girls = df[f'{age_group}Girls'].sum()
boys_enrollment.append(total_boys)
girls_enrollment.append(total_girls)
# Set the figure size
plt.figure(figsize=(12, 8))
# Plot a stacked bar chart for enrollment by age group and gender
plt.bar(age_groups, boys_enrollment, label='Boys', color='dodgerblue')
plt.bar(age_groups, girls_enrollment, bottom=boys_enrollment, label='Girls', color='lightcoral')
# Add labels and legend
plt.title("Distribution of Students by Age Group and Gender in 2022")
plt.xlabel("Age Group")
plt.ylabel("Total Enrollment")
plt.legend(title='Gender')
# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha="right")
# Show the plot
plt.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")
# Define the age group columns for boys and girls
age_group_columns = ['6yearsBoys', '6yearsGirls', '7yearsBoys', '7yearsGirls', '8yearsBoys', '8yearsGirls', '9yearsBoys', '9yearsGirls',
'10yearsBoys', '10yearsGirls', '11yearsBoys', '11yearsGirls', '12yearsBoys', '12yearsGirls', '13yearsBoys', '13yearsGirls']
# Create a new DataFrame for plotting by selecting the relevant columns
plot_data = df[['Region'] + age_group_columns]
# Melt the data to reorganize it for plotting
plot_data = pd.melt(plot_data, id_vars=['Region'], value_vars=age_group_columns,
var_name='AgeGroupGender', value_name='Enrollment')
# Extract the age group and gender information
plot_data['AgeGroup'] = plot_data['AgeGroupGender'].apply(lambda x: int(x.split('years')[0]))
plot_data['Gender'] = plot_data['AgeGroupGender'].apply(lambda x: 'Boys' if 'Boys' in x else 'Girls')
# Create a violin plot
plt.figure(figsize=(12, 8))
sns.violinplot(x='AgeGroup', y='Enrollment', hue='Gender', data=plot_data, palette={'Boys': 'dodgerblue', 'Girls': 'lightcoral'})
plt.title("Distribution of Enrollment by Age Group and Gender in 2022")
plt.xlabel("Age Group")
plt.ylabel("Enrollment")
plt.legend(title='Gender')
plt.xticks(rotation=45, ha="right")
plt.show()
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector): C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector): C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector): C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector):
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")
# Define the age groups and genders
age_groups = ['6years', '7years', '8years', '9years', '10years', '11years', '12years', '13years']
genders = ['Boys', 'Girls']
# Create a new DataFrame for plotting
plot_data = df[['Region'] + [f"{age_group}{gender}" for age_group in age_groups for gender in genders]]
# Rename the columns for clarity
plot_data.columns = ['Region'] + [f"{gender} - {age_group}" for age_group in age_groups for gender in genders]
# Melt the data for the box plot using pandas melt
melted_data = pd.melt(plot_data, id_vars=['Region'], value_vars=plot_data.columns[1:])
# Create a box plot
plt.figure(figsize=(12, 8))
sns.boxplot(data=melted_data, x='variable', y='value', palette='Set2')
plt.title("Box Plot of Enrollment by Age Group and Gender in 2022")
plt.xlabel("Age Group and Gender")
plt.ylabel("Enrollment")
plt.xticks(rotation=45, ha="right")
plt.show()
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
import pandas as pd
import plotly.express as px
# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")
# Define the age groups and genders
age_groups = ['6years', '7years', '8years', '9years', '10years', '11years', '12years', '13years']
genders = ['Boys', 'Girls']
# Create a new DataFrame for plotting
plot_data = df[['Region'] + [f"{age_group}{gender}" for age_group in age_groups for gender in genders]]
# Rename the columns for clarity
plot_data.columns = ['Region'] + [f"{gender} - {age_group}" for age_group in age_groups for gender in genders]
# Reshape the data for the TreeMap using pandas melt
melted_data = pd.melt(plot_data, id_vars=['Region'], value_vars=plot_data.columns[1:])
# Create the TreeMap
fig = px.treemap(melted_data,
path=['Region', 'variable'],
values='value',
color='value',
color_continuous_scale='Viridis',
title='TreeMap of Enrollment by Region, Age Group, and Gender in 2022')
# Customize the layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=40))
# Show the TreeMap
fig.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")
# Define the age groups and genders
age_groups = ['6years', '7years', '8years', '9years', '10years', '11years', '12years', '13years']
genders = ['Boys', 'Girls']
# Create subplots for each age group
fig, axes = plt.subplots(nrows=len(age_groups), ncols=1, figsize=(10, 8), sharex=True)
# Plot histograms for each age group
for i, age_group in enumerate(age_groups):
ax = axes[i]
ax.set_title(f"Histogram of Enrollment for {age_group} - Both Genders in 2022")
ax.set_xlabel("Enrollment")
ax.set_ylabel("Frequency")
# Extract the enrollment data for the current age group for both genders
enrollment_data = df[f"{age_group}Boys"] + df[f"{age_group}Girls"]
# Plot the histogram
sns.histplot(enrollment_data, bins=20, kde=True, color='skyblue', ax=ax)
# Adjust spacing between subplots
plt.tight_layout()
# Show the plots
plt.show()
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")
# Define the age groups and genders
age_groups = ['6years', '7years', '8years', '9years', '10years', '11years', '12years', '13years']
genders = ['Boys', 'Girls']
# Create subplots for each region
regions = df['Region'].unique()
# Create a grid of subplots with multiple rows and columns
n_rows = len(regions) // 3 + (len(regions) % 3 > 0)
n_cols = min(len(regions), 3)
fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(15, 10))
# Flatten the 2D axes array for easy iteration
axes = axes.flatten()
# Plot histograms for each region
for i, region in enumerate(regions):
ax = axes[i]
ax.set_title(f"Histogram of Enrollment in {region} - Both Genders in 2022")
ax.set_xlabel("Enrollment")
ax.set_ylabel("Frequency")
# Extract the enrollment data for the current region for both genders
region_data = df[df['Region'] == region]
enrollment_data = region_data[[f"{age_group}Boys" for age_group in age_groups]].sum(axis=1) + region_data[[f"{age_group}Girls" for age_group in age_groups]].sum(axis=1)
# Plot the histogram
sns.histplot(enrollment_data, bins=20, kde=True, color='skyblue', ax=ax)
# Adjust spacing between subplots
plt.tight_layout()
# Show the plots
plt.show()
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Assuming you've already loaded your dataset into the 'df' DataFrame
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")
# Group the data by 'Region' and calculate the sum of 'TotalBoys' and 'TotalGirls' in each region
total_enrollment_by_region = df.groupby('region')[['totalboys', 'totalgirls']].sum().reset_index()
# Set the figure size
plt.figure(figsize=(12, 6))
# Define the number of regions
num_regions = len(total_enrollment_by_region)
# Create an array for the x-axis positions
x = np.arange(num_regions)
# Define the width of each bar
bar_width = 0.35
# Create the grouped bar chart
plt.bar(x - bar_width/2, total_enrollment_by_region['totalboys'], bar_width, label='TotalBoys')
plt.bar(x + bar_width/2, total_enrollment_by_region['totalgirls'], bar_width, label='TotalGirls')
# Set the x-axis labels to be the regions
plt.xticks(x, total_enrollment_by_region['region'], rotation=90)
# Set labels and title
plt.xlabel('Region')
plt.ylabel('Total Enrollment')
plt.title('Total Enrollment by Region in 2022 (Boys vs. Girls)')
# Add a legend
plt.legend()
# Show the
<matplotlib.legend.Legend at 0x28ba611e560>
pip install geopandas
Requirement already satisfied: geopandas in c:\users\administrator\anaconda3\lib\site-packages (0.14.0) Requirement already satisfied: fiona>=1.8.21 in c:\users\administrator\anaconda3\lib\site-packages (from geopandas) (1.9.4.post1) Requirement already satisfied: pyproj>=3.3.0 in c:\users\administrator\anaconda3\lib\site-packages (from geopandas) (3.6.1) Requirement already satisfied: packaging in c:\users\administrator\anaconda3\lib\site-packages (from geopandas) (22.0) Requirement already satisfied: shapely>=1.8.0 in c:\users\administrator\anaconda3\lib\site-packages (from geopandas) (2.0.1) Requirement already satisfied: pandas>=1.4.0 in c:\users\administrator\anaconda3\lib\site-packages (from geopandas) (2.1.1) Requirement already satisfied: click-plugins>=1.0 in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (1.1.1) Requirement already satisfied: cligj>=0.5 in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (0.7.2) Requirement already satisfied: click~=8.0 in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (8.0.4) Requirement already satisfied: certifi in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (2023.5.7) Requirement already satisfied: attrs>=19.2.0 in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (22.1.0) Requirement already satisfied: six in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (1.16.0) Requirement already satisfied: tzdata>=2022.1 in c:\users\administrator\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (2023.3) Requirement already satisfied: pytz>=2020.1 in c:\users\administrator\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (2022.7) Requirement already satisfied: numpy>=1.22.4 in c:\users\administrator\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (1.23.5) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\administrator\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (2.8.2) Requirement already satisfied: colorama in c:\users\administrator\anaconda3\lib\site-packages (from click~=8.0->fiona>=1.8.21->geopandas) (0.4.6) Note: you may need to restart the kernel to use updated packages.
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")
# Load the shapefile for Tanzanian districts and towns
shapefile_path = "C:/Users/Administrator/Downloads/Districts_Shapefiles_2019/Districts and TC as 2020.shp"
tanzania_districts = gpd.read_file(shapefile_path)
# Ensure there are no leading or trailing spaces in the column name and make it lowercase
df.columns = df.columns.str.strip().str.lower()
# Specify the correct column name in the shapefile that corresponds to 'Region' in your dataset
shapefile_column_name = 'Region_Nam' # Replace with the actual name in your shapefile
# Merge your dataset with the geographic data based on the correct column name
merged_data = tanzania_districts.merge(df, left_on=shapefile_column_name, right_on='region', how='inner')
# Create a choropleth map
fig, ax = plt.subplots(figsize=(12, 8))
merged_data.plot(column='totalenrollment', cmap='YlGnBu', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)
ax.set_title('Total Enrollment by Region in 2022')
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
# Assuming you've already loaded your dataset into the 'df' DataFrame
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")
# Define the age group columns you want to visualize
age_group_columns = [
'6yearsboys', '6yearsgirls',
'7yearsboys', '7yearsgirls',
'8yearsboys', '8yearsgirls',
'9yearsboys', '9yearsgirls',
'10yearsboys', '10yearsgirls',
'11yearsboys', '11yearsgirls',
'12yearsboys', '12yearsgirls',
'13yearsboys', '13yearsgirls',
'above13yearsboys', 'above13yearsgirls'
]
# Create subplots for each age group column
fig, axes = plt.subplots(len(age_group_columns), 1, figsize=(12, 4 * len(age_group_columns)))
# Iterate through age group columns and create subplots
for i, age_group_column in enumerate(age_group_columns):
# Group the data by 'Council' and calculate the sum of the specified age group column
council_data = df.groupby('council')[age_group_column].sum().reset_index()
# Calculate the total enrollment (sum of boys and girls) for the age group column
council_data['TotalEnrollment'] = council_data[age_group_column] + council_data[age_group_column.replace('Boys', 'Girls')]
# Create a subplot for the current age group column
ax = axes[i]
# Plot total enrollment by Council for the age group
ax.bar(council_data['council'], council_data['TotalEnrollment'])
ax.set_title(f'Total Enrollment by Council for {age_group_column}')
ax.set_xlabel('Council')
ax.set_ylabel('Total Enrollment')
# Rotate x-axis labels for better readability
ax.tick_params(axis='x', rotation=90)
# Use tight_layout to improve subplot spacing
plt.tight_layout()
# Show or save the subplots
#plt.savefig('age_group_enrollment_subplots.png') # Save as an image file
plt.show() # Display the subplots (comment this line if you want to save only)
plt.close() # Close the figure to free up memory