In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
from google.colab import drive
drive.mount('/content/gdrive')
Mounted at /content/gdrive
In [ ]:
df = pd.read_csv('/content/gdrive/MyDrive/Polar Region Project/pr_disease/cryptosporidiumGiardia_wildlife_arctic_vanHemert.csv')
In [ ]:
print(df.shape)
(233, 11)
In [ ]:
df.head(5)
Out[Â ]:
field_ID | lab_ID | species | common_name | date | latitude | longitude | age_class | sex | cryptosporidium | giardia | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1AFSH001 | ARFO-001 | Vulpes lagopus | Arctic fox | 2014-05-06 | 71.1 | -157.1 | Adult | M | Negative | Negative |
1 | 1AFSH002 | ARFO-003 | Vulpes lagopus | Arctic fox | 2014-05-06 | 71.1 | -157.1 | Adult | M | Negative | Negative |
2 | 1AFSH004 | ARFO-006 | Vulpes lagopus | Arctic fox | 2014-05-06 | 71.1 | -157.1 | Adult | M | Negative | Negative |
3 | 2AFSH003 | ARFO-010 | Vulpes lagopus | Arctic fox | 2014-05-07 | 71.0 | -157.2 | Adult | M | Negative | Negative |
4 | 2AFSH004 | ARFO-012 | Vulpes lagopus | Arctic fox | 2014-05-07 | 71.3 | -156.5 | Adult | M | Negative | Negative |
In [ ]:
df.isnull().sum()
Out[Â ]:
0 | |
---|---|
field_ID | 0 |
lab_ID | 0 |
species | 0 |
common_name | 0 |
date | 0 |
latitude | 0 |
longitude | 0 |
age_class | 0 |
sex | 0 |
cryptosporidium | 0 |
giardia | 0 |
Distribution of positive cases for Cryptosporidium and Giardia¶
In [ ]:
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
sns.countplot(x='cryptosporidium', hue='cryptosporidium', data=df, palette='coolwarm')
plt.title('Cryptosporidium Positive/Negative Distribution')
plt.show()
In [ ]:
plt.subplot(1, 2, 2)
sns.countplot(x='giardia', hue='giardia', data=df, palette='viridis')
plt.title('Giardia Positive/Negative Distribution')
plt.show()
Positive cases by species¶
In [ ]:
plt.figure(figsize=(12, 8))
sns.countplot(x='species', hue='cryptosporidium', data=df, palette='coolwarm')
plt.title('Cryptosporidium Positive/Negative by Species')
plt.xticks(rotation=45)
plt.show()
In [ ]:
sns.countplot(x='species', hue='giardia', data=df, palette='viridis')
plt.title('Giardia Positive/Negative by Species')
plt.xticks(rotation=45)
plt.show()
In [ ]:
# Scatter plot of geographic distribution (latitude vs longitude)
plt.figure(figsize=(10, 8))
sns.scatterplot(x='longitude', y='latitude', hue='cryptosporidium', style='giardia', data=df, palette='tab10')
plt.title('Geographic Distribution of Cryptosporidium and Giardia')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(title='Legend', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()