6-1: Consider the Titanic dataset.
In [1]:
Copied!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
titanic = pd.read_csv('../data/titanic.csv', sep=",")
import pandas as pd import numpy as np import matplotlib.pyplot as plt titanic = pd.read_csv('../data/titanic.csv', sep=",")
6-2: Display the scatter plot of age and fare in the Titanic dataset.
In [2]:
Copied!
x = titanic.Age
y = titanic.Fare
plt.scatter(x, y)
plt.show(block=False)
x = titanic.Age y = titanic.Fare plt.scatter(x, y) plt.show(block=False)
6-3: Plot a histogram of the age variable in the Titanic dataset.
In [9]:
Copied!
plt.hist(titanic['Age'], density=True)
plt.hist(titanic['Age'], density=True)
Out[9]:
(array([0.00950368, 0.00809572, 0.03115094, 0.02974299, 0.02076729,
0.01231958, 0.00791973, 0.00422386, 0.00158395, 0.00035199]),
array([ 0.42 , 8.378, 16.336, 24.294, 32.252, 40.21 , 48.168, 56.126,
64.084, 72.042, 80. ]),
<BarContainer object of 10 artists>) 6-4: Plot a bivariate distribution of age and fare using kernel density estimation (KDE) in the Titanic dataset.
In [3]:
Copied!
import seaborn as sns
sns.set(color_codes=True)
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns sns.set(color_codes=True) import warnings warnings.filterwarnings("ignore")
In [4]:
Copied!
sns.kdeplot(data=titanic, x='Age', y='Fare')
sns.kdeplot(data=titanic, x='Age', y='Fare')
Out[4]:
<Axes: xlabel='Age', ylabel='Fare'>