Seaborn Introduction

Seaborn Introduction

In [1]:
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
In [2]:
def listAttr(obj, search = None):
    
    if not search:
        return [item for item in dir(obj) if not (item.startswith("_"))]
    
    search = search.lower()
    return [item for item in dir(obj) if not (item.startswith("_")) and search in item]
    
    pass
In [3]:
listAttr(sns)
Out[3]:
['FacetGrid',
 'JointGrid',
 'PairGrid',
 'algorithms',
 'axes_style',
 'axisgrid',
 'barplot',
 'blend_palette',
 'boxenplot',
 'boxplot',
 'categorical',
 'catplot',
 'choose_colorbrewer_palette',
 'choose_cubehelix_palette',
 'choose_dark_palette',
 'choose_diverging_palette',
 'choose_light_palette',
 'clustermap',
 'cm',
 'color_palette',
 'colors',
 'countplot',
 'crayon_palette',
 'crayons',
 'cubehelix_palette',
 'dark_palette',
 'desaturate',
 'despine',
 'displot',
 'distplot',
 'distributions',
 'diverging_palette',
 'dogplot',
 'ecdfplot',
 'external',
 'get_data_home',
 'get_dataset_names',
 'heatmap',
 'histplot',
 'hls_palette',
 'husl_palette',
 'jointplot',
 'kdeplot',
 'light_palette',
 'lineplot',
 'lmplot',
 'load_dataset',
 'matrix',
 'miscplot',
 'move_legend',
 'mpl',
 'mpl_palette',
 'pairplot',
 'palettes',
 'palplot',
 'plotting_context',
 'pointplot',
 'rcmod',
 'regplot',
 'regression',
 'relational',
 'relplot',
 'reset_defaults',
 'reset_orig',
 'residplot',
 'rugplot',
 'saturate',
 'scatterplot',
 'set',
 'set_color_codes',
 'set_context',
 'set_hls_values',
 'set_palette',
 'set_style',
 'set_theme',
 'stripplot',
 'swarmplot',
 'utils',
 'violinplot',
 'widgets',
 'xkcd_palette',
 'xkcd_rgb']
In [4]:
listAttr(sns, "load_dataset")
Out[4]:
['load_dataset']

Load an example dataset from the online repository

In [5]:
sns.get_dataset_names() 
Out[5]:
['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic']

Load dataset

In [6]:
tips = sns.load_dataset('tips')
tips
Out[6]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
... ... ... ... ... ... ... ...
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2

244 rows × 7 columns

In [7]:
sns.set(color_codes=True)
In [8]:
ax = sns.scatterplot(x = 'total_bill', y = 'tip', data = tips)
In [9]:
sns.set_style('ticks') 
ax = sns.barplot(x="total_bill", y="tip", data=tips)
In [10]:
ax = sns.barplot(x = "total_bill", y = "tip", data = tips)
In [11]:
ax = sns.scatterplot(x="total_bill", y="tip", hue="day", data=tips)
In [12]:
ax = sns.scatterplot(x="total_bill", y="tip", hue="day", style="time", data=tips)
help(sns.lmplot)

to enhance a scatterplot to include a linear regression model (and its uncertainty) using lmplot():

In [13]:
sns.lmplot(x="total_bill", y="tip", data=tips)
Out[13]:
<seaborn.axisgrid.FacetGrid at 0x18fc29ba5e0>
In [14]:
sns.lmplot(x = "total_bill", y = "tip", data = tips, hue = "time")
Out[14]:
<seaborn.axisgrid.FacetGrid at 0x18fc2a75b20>
In [15]:
sns.lmplot(x = "total_bill", y = "tip", data = tips, hue="day")
Out[15]:
<seaborn.axisgrid.FacetGrid at 0x18fc2a96820>

Specialized categorical plots

help(sns.catplot)
In [16]:
sns.catplot(x="day", y="total_bill", hue="smoker", kind="swarm", data=tips);
In [17]:
tips.query("size != 3")
Out[17]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
5 25.29 4.71 Male No Sun Dinner 4
6 8.77 2.00 Male No Sun Dinner 2
... ... ... ... ... ... ... ...
237 32.83 1.17 Male Yes Sat Dinner 2
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2

206 rows × 7 columns

In [18]:
sns.catplot(x="size", y="total_bill", kind="swarm",
            data=tips.query("size != 3"));
C:\ProgramData\Miniconda3\lib\site-packages\seaborn\categorical.py:3540: UserWarning: 9.6% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)
In [19]:
sns.catplot(x="day", y="total_bill", hue="smoker", kind="violin", data=tips);
In [20]:
sns.catplot(x="day", y="total_bill", hue="smoker",
            kind="bar", data=tips);
In [21]:
g = sns.catplot(x = "total_bill", y = "day",  hue="time", kind = 'box', legend=False, data = tips)
g.add_legend(title = "Meal")
Out[21]:
<seaborn.axisgrid.FacetGrid at 0x18fc1b7fe80>
In [22]:
g = sns.catplot(x = "total_bill", y = "day",  hue="time", kind = 'box', legend=False, data = tips)
g.add_legend(title = "Meal")
g.fig.set_size_inches(10.5, 5.5)
g.set_axis_labels("Total bill ($)", "")
Out[22]:
<seaborn.axisgrid.FacetGrid at 0x18fc1d1d1c0>
In [23]:
g = sns.catplot(x="total_bill", y="day", hue="time",
                height=3.5, aspect=1.5,
                kind="boxen", legend=False, data=tips);
In [24]:
g = sns.catplot(x="total_bill", y="day", hue="time",
                height=3.5, aspect=1.5,
                kind="box", legend=False, data=tips);
g.add_legend(title="Meal")
g.set_axis_labels("Total bill ($)", "")
g.set(xlim=(0, 60), yticklabels=["Thursday", "Friday", "Saturday", "Sunday"])
g.despine(trim=True)
g.fig.set_size_inches(6.5, 3.5)
g.ax.set_xticks([5, 15, 25, 35, 45, 55], minor=True);
plt.setp(g.ax.get_yticklabels(), rotation=30);

Histograms

help(sns.distplot)
In [25]:
sns.distplot(tips['total_bill']) 
C:\Users\nutan\AppData\Local\Temp\ipykernel_9328\1695966430.py:1: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(tips['total_bill'])
Out[25]:
<AxesSubplot:xlabel='total_bill', ylabel='Density'>

https://www.khanacademy.org/math/statistics-probability/displaying-describing-data/quantitative-data-graphs/a/histograms-review

"bin" (or "bucket") the range of values—that is, divide the entire range of values into a series of intervals and then count how many values fall into each interval.

In [26]:
sns.distplot(tips['total_bill'], bins=20, kde=False) 
C:\Users\nutan\AppData\Local\Temp\ipykernel_9328\4103393073.py:1: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(tips['total_bill'], bins=20, kde=False)
Out[26]:
<AxesSubplot:xlabel='total_bill'>
In [27]:
#kde(Kernel density estimation) - plotting the shape of a distribution
sns.distplot(tips['total_bill'], kde=False) 
C:\Users\nutan\AppData\Local\Temp\ipykernel_9328\1721381072.py:2: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(tips['total_bill'], kde=False)
Out[27]:
<AxesSubplot:xlabel='total_bill'>
In [28]:
tips.time.unique()
Out[28]:
['Dinner', 'Lunch']
Categories (2, object): ['Lunch', 'Dinner']
This particular plot shows the relationship between five variables in the tips dataset. Three are numeric, and two are categorical. Two numeric variables (total_bill and tip) determined the position of each point on the axes, and the third (size) determined the size of each point. One categorical variable split the dataset onto two different axes (facets), and the other determined the color and shape of each point.
In [29]:
sns.relplot(x="total_bill", y="tip", col="time",
            hue="smoker", style="smoker", size="size",
            data=tips)
Out[29]:
<seaborn.axisgrid.FacetGrid at 0x18fc3c9e0d0>
In [30]:
sns.relplot(x="total_bill", y="tip", col="time",
            hue="smoker", style="smoker", size="size", kind="line", data=tips)
Out[30]:
<seaborn.axisgrid.FacetGrid at 0x18fc2ba33d0>

Machine Learning

  1. Deal Banking Marketing Campaign Dataset With Machine Learning

TensorFlow

  1. Difference Between Scalar, Vector, Matrix and Tensor
  2. TensorFlow Deep Learning Model With IRIS Dataset
  3. Sequence to Sequence Learning With Neural Networks To Perform Number Addition
  4. Image Classification Model MobileNet V2 from TensorFlow Hub
  5. Step by Step Intent Recognition With BERT
  6. Sentiment Analysis for Hotel Reviews With NLTK and Keras
  7. Simple Sequence Prediction With LSTM
  8. Image Classification With ResNet50 Model
  9. Predict Amazon Inc Stock Price with Machine Learning
  10. Predict Diabetes With Machine Learning Algorithms
  11. TensorFlow Build Custom Convolutional Neural Network With MNIST Dataset
  12. Deal Banking Marketing Campaign Dataset With Machine Learning

PySpark

  1. How to Parallelize and Distribute Collection in PySpark
  2. Role of StringIndexer and Pipelines in PySpark ML Feature - Part 1
  3. Role of OneHotEncoder and Pipelines in PySpark ML Feature - Part 2
  4. Feature Transformer VectorAssembler in PySpark ML Feature - Part 3
  5. Logistic Regression in PySpark (ML Feature) with Breast Cancer Data Set

PyTorch

  1. Build the Neural Network with PyTorch
  2. Image Classification with PyTorch
  3. Twitter Sentiment Classification In PyTorch
  4. Training an Image Classifier in Pytorch

Natural Language Processing

  1. Spelling Correction Of The Text Data In Natural Language Processing
  2. Handling Text For Machine Learning
  3. Extracting Text From PDF File in Python Using PyPDF2
  4. How to Collect Data Using Twitter API V2 For Natural Language Processing
  5. Converting Text to Features in Natural Language Processing
  6. Extract A Noun Phrase For A Sentence In Natural Language Processing