Analysis of Enrollment in Government and Non-Government Primary Schools in Tanzania by Age and Gender in 2022¶

Raw data(set) obtained from https://www.tamisemi.go.tz/storage/app/epr42022/Enrollment%20in%20Government%20and%20Non_Government%20Primary%20Schools%20by%20Age%20and%20Sex_2022.xlsx. ¶

Data cleaned, prepared, and analyzed by Moiz A. Rashid - Data Analyst & Database Administrator on 06/10/2023¶

In [77]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style for visualizations
sns.set_style("whitegrid")

# Load the dataset
file_path = "Enrollment in Government and Non_Government Primary Schools by Age and Sex_2022-1.xlsx"
df = pd.read_excel(file_path)

# Display the first few rows of the dataset
df.head()
Out[77]:
Region Council Ward School RegNo Ownership Below6YearsBoys Below6YearsGirls 6yearsBoys 6yearsGirls ... 11yearsGirls 12yearsBoys 12yearsGirls 13yearsBoys 13yearsGirls Above13yearsBoys Above13yearsGirls TotalBoys TotalGirls TotalEnrollment
0 Arusha Arusha Bangata BANGATA EM.1865 Government 1 0 23 21 ... 25 28 14 17 12 29 15 248 193 441
1 Arusha Arusha Bangata ENGIKARETI EM.14567 Government 0 0 25 32 ... 30 28 24 39 25 30 26 207 220 427
2 Arusha Arusha Bangata IKIRWA EM.15402 Non-Government 0 0 6 1 ... 9 8 7 4 2 2 0 81 70 151
3 Arusha Arusha Bangata MIDAWE EM.4582 Government 0 0 29 26 ... 40 23 24 9 11 34 32 247 241 488
4 Arusha Arusha Bangata SASI EM.3409 Government 7 9 22 25 ... 20 26 31 16 26 11 8 180 214 394

5 rows × 29 columns

In [78]:
# Calculate the distribution of school ownership
ownership_counts = df['Ownership'].value_counts()

# Create a pie chart
plt.figure(figsize=(8, 8))
plt.pie(ownership_counts, labels=ownership_counts.index, autopct='%1.1f%%', startangle=140, shadow=True)
plt.title('Distribution of School Ownership in 2022')

# Add count information to the legend
legend_labels = [f'{label} ({count})' for label, count in zip(ownership_counts.index, ownership_counts)]
plt.legend(legend_labels, loc='upper right')

plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()
In [3]:
# Sum the "TotalBoys" and "TotalGirls" across all rows
total_boys = df['TotalBoys'].sum()
total_girls = df['TotalGirls'].sum()

# Create a DataFrame for the totals
totals_data = pd.DataFrame({'Gender': ['Total Boys', 'Total Girls'], 'Count': [total_boys, total_girls]})

# Create a pie chart
plt.figure(figsize=(8, 8))
plt.pie(totals_data['Count'], labels=totals_data['Gender'], autopct='%1.1f%%', startangle=140, shadow=True)
plt.title('Distribution of Total Boys and Total Girls in 2022')

# Add count information to the legend
legend_labels = [f'{label}: {count}' for label, count in zip(totals_data['Gender'], totals_data['Count'])]
plt.legend(legend_labels, loc='upper right')

plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()
In [73]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming you've already loaded your dataset into the 'df' DataFrame
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")

# Group the data by 'Region' and count the unique values in the 'School' column
schools_per_region = df.groupby('region')['school'].nunique().reset_index()

# Rename the count column for clarity
schools_per_region.rename(columns={'school': 'SchoolCount'}, inplace=True)

# Sort the data by SchoolCount in descending order
schools_per_region = schools_per_region.sort_values(by='SchoolCount', ascending=False)

# Create a bar chart to visualize the number of schools per region
plt.figure(figsize=(12, 6))
plt.bar(schools_per_region['region'], schools_per_region['SchoolCount'])
plt.title('Number of Schools in Each Region')
plt.xlabel('Region')
plt.ylabel('Number of Schools')
plt.xticks(rotation=90)  # Rotate x-axis labels for better readability

# Display the plot
plt.tight_layout()
plt.show()
In [43]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming you've already loaded your dataset into the 'df' DataFrame
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")

# Group the data by 'Region' and calculate the sum of 'TotalEnrollment' in each region
total_enrollment_by_region = df.groupby('region')['totalenrollment'].sum().reset_index()

# Sort the DataFrame by 'TotalEnrollment' in descending order
total_enrollment_by_region = total_enrollment_by_region.sort_values(by='totalenrollment', ascending=False)

# Create a bar chart
plt.figure(figsize=(12, 6))
plt.bar(total_enrollment_by_region['region'], total_enrollment_by_region['totalenrollment'])
plt.xlabel('Region')
plt.ylabel('Total Enrollment')
plt.title('Total Enrollment by Region in 2022')
plt.xticks(rotation=90)  # Rotate x-axis labels for better readability
plt.tight_layout()

# Show the plot
plt.show()
In [6]:
# Calculate the total enrollment (TotalBoys + TotalGirls) for each row
df['TotalEnrollment'] = df['TotalBoys'] + df['TotalGirls']

# Group the data by Region and calculate the total enrollment and number of schools for each region
region_summary = df.groupby('Region').agg({'TotalEnrollment': 'sum', 'School': 'count'}).reset_index()

# Calculate the enrollment ratio (total enrollment per school) for each region
region_summary['EnrollmentRatio'] = region_summary['TotalEnrollment'] / region_summary['School']

# Sort the data by enrollment ratio for better visualization
region_summary_sorted = region_summary.sort_values(by='EnrollmentRatio', ascending=False)

# Create a bar chart to visualize the enrollment ratio for each region
plt.figure(figsize=(12, 6))
sns.barplot(x=region_summary_sorted['Region'], y=region_summary_sorted['EnrollmentRatio'], palette="Set3")
plt.title("Enrollment Ratio by Region in 2022")
plt.xlabel("Region")
plt.ylabel("Enrollment Ratio (Total Enrollment per School)")
plt.xticks(rotation=45, ha="right")
plt.show()
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(vector):
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(vector):
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(vector):
In [7]:
import pandas as pd
import matplotlib.pyplot as plt

# Define age group categories
age_groups = ['Below6Years', '6years', '7years', '8years', '9years', '10years', '11years', '12years', '13years', 'Above13years']

# Initialize lists to store the enrollment for boys and girls in each age group
boys_enrollment = []
girls_enrollment = []

# Calculate the total enrollment for each age group and gender
for age_group in age_groups:
    total_boys = df[f'{age_group}Boys'].sum()
    total_girls = df[f'{age_group}Girls'].sum()
    boys_enrollment.append(total_boys)
    girls_enrollment.append(total_girls)

# Set the figure size
plt.figure(figsize=(12, 8))

# Plot a stacked bar chart for enrollment by age group and gender
plt.bar(age_groups, boys_enrollment, label='Boys', color='dodgerblue')
plt.bar(age_groups, girls_enrollment, bottom=boys_enrollment, label='Girls', color='lightcoral')

# Add labels and legend
plt.title("Distribution of Students by Age Group and Gender in 2022")
plt.xlabel("Age Group")
plt.ylabel("Total Enrollment")
plt.legend(title='Gender')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha="right")

# Show the plot
plt.show()
In [8]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")

# Define the age group columns for boys and girls
age_group_columns = ['6yearsBoys', '6yearsGirls', '7yearsBoys', '7yearsGirls', '8yearsBoys', '8yearsGirls', '9yearsBoys', '9yearsGirls',
                     '10yearsBoys', '10yearsGirls', '11yearsBoys', '11yearsGirls', '12yearsBoys', '12yearsGirls', '13yearsBoys', '13yearsGirls']

# Create a new DataFrame for plotting by selecting the relevant columns
plot_data = df[['Region'] + age_group_columns]

# Melt the data to reorganize it for plotting
plot_data = pd.melt(plot_data, id_vars=['Region'], value_vars=age_group_columns,
                    var_name='AgeGroupGender', value_name='Enrollment')

# Extract the age group and gender information
plot_data['AgeGroup'] = plot_data['AgeGroupGender'].apply(lambda x: int(x.split('years')[0]))
plot_data['Gender'] = plot_data['AgeGroupGender'].apply(lambda x: 'Boys' if 'Boys' in x else 'Girls')

# Create a violin plot
plt.figure(figsize=(12, 8))
sns.violinplot(x='AgeGroup', y='Enrollment', hue='Gender', data=plot_data, palette={'Boys': 'dodgerblue', 'Girls': 'lightcoral'})
plt.title("Distribution of Enrollment by Age Group and Gender in 2022")
plt.xlabel("Age Group")
plt.ylabel("Enrollment")
plt.legend(title='Gender')
plt.xticks(rotation=45, ha="right")
plt.show()
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(vector):
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(vector):
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(vector):
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if pd.api.types.is_categorical_dtype(vector):
In [10]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")

# Define the age groups and genders
age_groups = ['6years', '7years', '8years', '9years', '10years', '11years', '12years', '13years']
genders = ['Boys', 'Girls']

# Create a new DataFrame for plotting
plot_data = df[['Region'] + [f"{age_group}{gender}" for age_group in age_groups for gender in genders]]

# Rename the columns for clarity
plot_data.columns = ['Region'] + [f"{gender} - {age_group}" for age_group in age_groups for gender in genders]

# Melt the data for the box plot using pandas melt
melted_data = pd.melt(plot_data, id_vars=['Region'], value_vars=plot_data.columns[1:])

# Create a box plot
plt.figure(figsize=(12, 8))
sns.boxplot(data=melted_data, x='variable', y='value', palette='Set2')
plt.title("Box Plot of Enrollment by Age Group and Gender in 2022")
plt.xlabel("Age Group and Gender")
plt.ylabel("Enrollment")
plt.xticks(rotation=45, ha="right")
plt.show()
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

In [9]:
import pandas as pd
import plotly.express as px

# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")

# Define the age groups and genders
age_groups = ['6years', '7years', '8years', '9years', '10years', '11years', '12years', '13years']
genders = ['Boys', 'Girls']

# Create a new DataFrame for plotting
plot_data = df[['Region'] + [f"{age_group}{gender}" for age_group in age_groups for gender in genders]]

# Rename the columns for clarity
plot_data.columns = ['Region'] + [f"{gender} - {age_group}" for age_group in age_groups for gender in genders]

# Reshape the data for the TreeMap using pandas melt
melted_data = pd.melt(plot_data, id_vars=['Region'], value_vars=plot_data.columns[1:])

# Create the TreeMap
fig = px.treemap(melted_data, 
                 path=['Region', 'variable'], 
                 values='value', 
                 color='value',
                 color_continuous_scale='Viridis',
                 title='TreeMap of Enrollment by Region, Age Group, and Gender in 2022')

# Customize the layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=40))

# Show the TreeMap
fig.show()
In [11]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")

# Define the age groups and genders
age_groups = ['6years', '7years', '8years', '9years', '10years', '11years', '12years', '13years']
genders = ['Boys', 'Girls']

# Create subplots for each age group
fig, axes = plt.subplots(nrows=len(age_groups), ncols=1, figsize=(10, 8), sharex=True)

# Plot histograms for each age group
for i, age_group in enumerate(age_groups):
    ax = axes[i]
    ax.set_title(f"Histogram of Enrollment for {age_group} - Both Genders in 2022")
    ax.set_xlabel("Enrollment")
    ax.set_ylabel("Frequency")
    
    # Extract the enrollment data for the current age group for both genders
    enrollment_data = df[f"{age_group}Boys"] + df[f"{age_group}Girls"]
    
    # Plot the histogram
    sns.histplot(enrollment_data, bins=20, kde=True, color='skyblue', ax=ax)

# Adjust spacing between subplots
plt.tight_layout()

# Show the plots
plt.show()
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

In [12]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")

# Define the age groups and genders
age_groups = ['6years', '7years', '8years', '9years', '10years', '11years', '12years', '13years']
genders = ['Boys', 'Girls']

# Create subplots for each region
regions = df['Region'].unique()

# Create a grid of subplots with multiple rows and columns
n_rows = len(regions) // 3 + (len(regions) % 3 > 0)
n_cols = min(len(regions), 3)
fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(15, 10))

# Flatten the 2D axes array for easy iteration
axes = axes.flatten()

# Plot histograms for each region
for i, region in enumerate(regions):
    ax = axes[i]
    ax.set_title(f"Histogram of Enrollment in {region} - Both Genders in 2022")
    ax.set_xlabel("Enrollment")
    ax.set_ylabel("Frequency")
    
    # Extract the enrollment data for the current region for both genders
    region_data = df[df['Region'] == region]
    enrollment_data = region_data[[f"{age_group}Boys" for age_group in age_groups]].sum(axis=1) + region_data[[f"{age_group}Girls" for age_group in age_groups]].sum(axis=1)
    
    # Plot the histogram
    sns.histplot(enrollment_data, bins=20, kde=True, color='skyblue', ax=ax)

# Adjust spacing between subplots
plt.tight_layout()

# Show the plots
plt.show()
C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning:

is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead

C:\Users\Administrator\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

In [55]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Assuming you've already loaded your dataset into the 'df' DataFrame
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")

# Group the data by 'Region' and calculate the sum of 'TotalBoys' and 'TotalGirls' in each region
total_enrollment_by_region = df.groupby('region')[['totalboys', 'totalgirls']].sum().reset_index()

# Set the figure size
plt.figure(figsize=(12, 6))

# Define the number of regions
num_regions = len(total_enrollment_by_region)

# Create an array for the x-axis positions
x = np.arange(num_regions)

# Define the width of each bar
bar_width = 0.35

# Create the grouped bar chart
plt.bar(x - bar_width/2, total_enrollment_by_region['totalboys'], bar_width, label='TotalBoys')
plt.bar(x + bar_width/2, total_enrollment_by_region['totalgirls'], bar_width, label='TotalGirls')

# Set the x-axis labels to be the regions
plt.xticks(x, total_enrollment_by_region['region'], rotation=90)

# Set labels and title
plt.xlabel('Region')
plt.ylabel('Total Enrollment')
plt.title('Total Enrollment by Region in 2022 (Boys vs. Girls)')

# Add a legend
plt.legend()

# Show the
Out[55]:
<matplotlib.legend.Legend at 0x28ba611e560>
In [13]:
pip install geopandas
Requirement already satisfied: geopandas in c:\users\administrator\anaconda3\lib\site-packages (0.14.0)
Requirement already satisfied: fiona>=1.8.21 in c:\users\administrator\anaconda3\lib\site-packages (from geopandas) (1.9.4.post1)
Requirement already satisfied: pyproj>=3.3.0 in c:\users\administrator\anaconda3\lib\site-packages (from geopandas) (3.6.1)
Requirement already satisfied: packaging in c:\users\administrator\anaconda3\lib\site-packages (from geopandas) (22.0)
Requirement already satisfied: shapely>=1.8.0 in c:\users\administrator\anaconda3\lib\site-packages (from geopandas) (2.0.1)
Requirement already satisfied: pandas>=1.4.0 in c:\users\administrator\anaconda3\lib\site-packages (from geopandas) (2.1.1)
Requirement already satisfied: click-plugins>=1.0 in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (1.1.1)
Requirement already satisfied: cligj>=0.5 in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (0.7.2)
Requirement already satisfied: click~=8.0 in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (8.0.4)
Requirement already satisfied: certifi in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (2023.5.7)
Requirement already satisfied: attrs>=19.2.0 in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (22.1.0)
Requirement already satisfied: six in c:\users\administrator\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (1.16.0)
Requirement already satisfied: tzdata>=2022.1 in c:\users\administrator\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (2023.3)
Requirement already satisfied: pytz>=2020.1 in c:\users\administrator\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (2022.7)
Requirement already satisfied: numpy>=1.22.4 in c:\users\administrator\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (1.23.5)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\administrator\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (2.8.2)
Requirement already satisfied: colorama in c:\users\administrator\anaconda3\lib\site-packages (from click~=8.0->fiona>=1.8.21->geopandas) (0.4.6)
Note: you may need to restart the kernel to use updated packages.
In [31]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

# Load your dataset (assuming you've already loaded it)
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")

# Load the shapefile for Tanzanian districts and towns
shapefile_path = "C:/Users/Administrator/Downloads/Districts_Shapefiles_2019/Districts and TC as 2020.shp"
tanzania_districts = gpd.read_file(shapefile_path)

# Ensure there are no leading or trailing spaces in the column name and make it lowercase
df.columns = df.columns.str.strip().str.lower()

# Specify the correct column name in the shapefile that corresponds to 'Region' in your dataset
shapefile_column_name = 'Region_Nam'  # Replace with the actual name in your shapefile

# Merge your dataset with the geographic data based on the correct column name
merged_data = tanzania_districts.merge(df, left_on=shapefile_column_name, right_on='region', how='inner')

# Create a choropleth map
fig, ax = plt.subplots(figsize=(12, 8))
merged_data.plot(column='totalenrollment', cmap='YlGnBu', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)
ax.set_title('Total Enrollment by Region in 2022')
plt.show()
In [70]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming you've already loaded your dataset into the 'df' DataFrame
# df = pd.read_excel("Enrollment in Government and Non-Government Primary Schools by Age and Sex_2022-1.xlsx")

# Define the age group columns you want to visualize
age_group_columns = [
    '6yearsboys', '6yearsgirls',
    '7yearsboys', '7yearsgirls',
    '8yearsboys', '8yearsgirls',
    '9yearsboys', '9yearsgirls',
    '10yearsboys', '10yearsgirls',
    '11yearsboys', '11yearsgirls',
    '12yearsboys', '12yearsgirls',
    '13yearsboys', '13yearsgirls',
    'above13yearsboys', 'above13yearsgirls'
]

# Create subplots for each age group column
fig, axes = plt.subplots(len(age_group_columns), 1, figsize=(12, 4 * len(age_group_columns)))

# Iterate through age group columns and create subplots
for i, age_group_column in enumerate(age_group_columns):
    # Group the data by 'Council' and calculate the sum of the specified age group column
    council_data = df.groupby('council')[age_group_column].sum().reset_index()
    
    # Calculate the total enrollment (sum of boys and girls) for the age group column
    council_data['TotalEnrollment'] = council_data[age_group_column] + council_data[age_group_column.replace('Boys', 'Girls')]
    
    # Create a subplot for the current age group column
    ax = axes[i]
    
    # Plot total enrollment by Council for the age group
    ax.bar(council_data['council'], council_data['TotalEnrollment'])
    ax.set_title(f'Total Enrollment by Council for {age_group_column}')
    ax.set_xlabel('Council')
    ax.set_ylabel('Total Enrollment')
    
    # Rotate x-axis labels for better readability
    ax.tick_params(axis='x', rotation=90)
    
    # Use tight_layout to improve subplot spacing
    plt.tight_layout()

# Show or save the subplots
#plt.savefig('age_group_enrollment_subplots.png')  # Save as an image file
plt.show()  # Display the subplots (comment this line if you want to save only)
plt.close()  # Close the figure to free up memory
In [ ]: