import pandas as pd
url = "https://raw.githubusercontent.com/PhilChodrow/PIC16B/master/datasets/palmer_penguins.csv"
penguins = pd.read_csv(url)
penguins.head()
studyName | Sample Number | Species | Region | Island | Stage | Individual ID | Clutch Completion | Date Egg | Culmen Length (mm) | Culmen Depth (mm) | Flipper Length (mm) | Body Mass (g) | Sex | Delta 15 N (o/oo) | Delta 13 C (o/oo) | Comments | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | PAL0708 | 1 | Adelie Penguin (Pygoscelis adeliae) | Anvers | Torgersen | Adult, 1 Egg Stage | N1A1 | Yes | 11/11/07 | 39.1 | 18.7 | 181.0 | 3750.0 | MALE | NaN | NaN | Not enough blood for isotopes. |
1 | PAL0708 | 2 | Adelie Penguin (Pygoscelis adeliae) | Anvers | Torgersen | Adult, 1 Egg Stage | N1A2 | Yes | 11/11/07 | 39.5 | 17.4 | 186.0 | 3800.0 | FEMALE | 8.94956 | -24.69454 | NaN |
2 | PAL0708 | 3 | Adelie Penguin (Pygoscelis adeliae) | Anvers | Torgersen | Adult, 1 Egg Stage | N2A1 | Yes | 11/16/07 | 40.3 | 18.0 | 195.0 | 3250.0 | FEMALE | 8.36821 | -25.33302 | NaN |
3 | PAL0708 | 4 | Adelie Penguin (Pygoscelis adeliae) | Anvers | Torgersen | Adult, 1 Egg Stage | N2A2 | Yes | 11/16/07 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Adult not sampled. |
4 | PAL0708 | 5 | Adelie Penguin (Pygoscelis adeliae) | Anvers | Torgersen | Adult, 1 Egg Stage | N3A1 | Yes | 11/16/07 | 36.7 | 19.3 | 193.0 | 3450.0 | FEMALE | 8.76651 | -25.32426 | NaN |
print(pd.unique(penguins["Sex"]))
['MALE' 'FEMALE' nan '.']
print(pd.unique(penguins["Stage"]))
['Adult, 1 Egg Stage']
import seaborn as sns
penguins.dropna()
penguins = penguins[penguins["Sex"] != "."]
penguins[["Month Egg", "Day Egg", "Year Egg"]] = penguins["Date Egg"].str.split("/", expand = True)
penguins["Year Egg"] = "20" + penguins["Year Egg"]
penguins["Year-Month Egg"] = pd.to_datetime(penguins["Year Egg"] + "-" + penguins["Month Egg"])
penguins["Year-Month Egg"].head()
0 2007-11-01 1 2007-11-01 2 2007-11-01 3 2007-11-01 4 2007-11-01 Name: Year-Month Egg, dtype: datetime64[ns]
averages = penguins.groupby(["Sex", "Year-Month Egg"])[["Body Mass (g)"]].mean()
averages = averages.reset_index()
averages.head()
Sex | Year-Month Egg | Body Mass (g) | |
---|---|---|---|
0 | FEMALE | 2007-11-01 | 3792.187500 |
1 | FEMALE | 2007-12-01 | 4283.333333 |
2 | FEMALE | 2008-11-01 | 3887.500000 |
3 | FEMALE | 2009-11-01 | 3839.732143 |
4 | FEMALE | 2009-12-01 | 4837.500000 |
sns.set(font_scale = 0.75)
plot = sns.lineplot(data = averages,
x = "Year-Month Egg",
y = "Body Mass (g)",
hue = "Sex").set(title = "Body Mass of Female and Male Penguins Over Time")
fig = plot.get_figure()
fig.savefig()
#penguins = penguins.set_index(keys = ["Species", "Island"])
#penguins.head()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) /var/folders/65/b6mdxbvd3hzfbdzc9x8l5mwm0000gn/T/ipykernel_42904/509637267.py in <module> 5 hue = "Sex").set(title = "Body Mass of Female and Male Penguins Over Time") 6 ----> 7 fig = plot.get_figure() 8 fig.savefig() 9 AttributeError: 'list' object has no attribute 'get_figure'