from bokeh.io import output_notebook

output_notebook()  # render plots inline in notebook


# import the relevant libraries
import pandas as pd


file = "../data/csv_files/life_expectancy.csv"
df = pd.read_csv(file)

# select only the relevant columns
df = df.loc[:, ["country", "2007"]]

americas = (
    "Argentina",
    "Bolivia",
    "Brazil",
    "Canada",
    "Chile",
    "Colombia",
    "Costa Rica",
    "Cuba",
    "Dominican Republic",
    "Ecuador",
    "El_Salvador",
    "Guatemala",
    "Haiti",
    "Honduras",
    "Jamaica",
    "Mexico",
    "Nicaragua",
    "Panama",
    "Paraguay",
    "Peru",
    "Puerto Rico",
    "Trinidad and Tobago",
    "United States",
    "Uruguay",
    "Venezuela",
)

# create a new dataframe consisting of only american countries
df = df[df["country"].isin(americas)].reset_index(drop=True)
df = df.rename(columns={"2007": "years"})
df["years"] = df["years"].round()

df


from bokeh.plotting import figure, show


# plot by country in alphabetical order

# sort dataframe by "country" column in descending order
df = df.sort_values("country", ascending=False)

# create figure object
p = figure(
    title="Figure 6.13 Life expectancy",  # plot title
    height=400,  # plot height
    y_range=df.country,  # categorical range on the y-axis
    x_axis_label="life expectancy (years)",
    sizing_mode="stretch_width",  # make plot width responsive to screen size
)

# create dot plot
p.scatter(
    x="years",  # x-axis column name
    y="country",  # y-axis column name
    source=df,  # data source for x and y axis
    size=8,  # circle size
)

# plot customization

# remove line color and minor ticks in x-axis
p.xaxis.minor_tick_out = 0
p.xaxis.axis_line_color = None

# remove line color in y-axis
p.yaxis.axis_line_color = None


show(p)  # display plot


# plot by life expectancy in descending order

# sort dataframe by "years" column in ascending order
df = df.sort_values("years")

p = figure(
    title="Figure 6.11 Life expectancy",
    height=400,
    y_range=df.country,
    x_axis_label="life expectancy (years)",
    sizing_mode="stretch_width",
)

p.scatter(x="years", y="country", source=df, size=8)

p.xaxis.minor_tick_out = 0
p.xaxis.axis_line_color = None
p.yaxis.axis_line_color = None

show(p)


file = "../data/csv_files/Internet_user.csv"
df = pd.read_csv(file, encoding="ISO-8859-1")

countries = (
    "Iceland",
    "Norway",
    "United Kingdom",
    "Japan",
    "Canada",
    "Germany",
    "New Zealand",
    "France",
    "Israel",
    "United States",
    "Argentina",
    "Chile",
    "Italy",
    "Brazil",
    "Mexico",
    "South Africa",
    "China",
    "Algeria",
    "India",
    "Kenya",
)

# create new dataframe with only the selected countries and columns
df = df[df["country"].isin(countries)].reset_index(drop=True).fillna(0)
df = df.drop(["country_code", "indicator", "indicator_code"], axis=1)

# stack dataframe columns
df = pd.DataFrame(df.set_index("country").stack(), columns=["percentage"])
df = df.reset_index().rename(columns={"level_1": "year"}).fillna(0)

# convert "year" column to integer type
df["year"] = df.year.astype(int)

df


# import relevenat libraries
from bokeh.transform import transform
from bokeh.models import ColorBar, LinearColorMapper, FixedTicker


# plot heatmap

# create figure object
p = figure(
    title="Figure 6.15 Internet adoption over time",  # plot title
    height=400,  # plot height
    toolbar_location=None,  # remove toolbars
    y_axis_location="right",  # display y axis on the right of plot
    y_range=countries[::-1],  # categorical range of y-axis in reverse order
)

# create color mapper object
mapper = LinearColorMapper(
    palette="Magma256", low=min(df["percentage"]), high=max(df["percentage"])
)

# create rectangle glyph
p.rect(
    x="year",  # x-axis column name
    y="country",  # y-axis column name
    width=2,  # rectangle width
    height=1,  # rectangle height
    source=df,  # data source for x and y axis columns
    # map percentage values to color mapper object using transform
    fill_color=transform("percentage", mapper),
    line_color="white",  # rectangle line color
)


# plot customization

# configure x-axis ticks to show only specified tick labels
p.xaxis.ticker = [1995, 2000, 2005, 2010, 2015]

# start and end x-axis at the specified years
p.x_range.start = 1993
p.x_range.end = 2016

# remove x-axis major ticks
p.xaxis.major_tick_line_color = None
p.xaxis.major_tick_out = 0

# remove y-axis lines and ticks
p.yaxis.minor_tick_out = 0
p.yaxis.major_tick_out = 0
p.yaxis.major_tick_line_color = None
p.yaxis.axis_line_color = None

# create color bar object
color_bar = ColorBar(
    color_mapper=mapper,
    location=(0, 0),
    ticker=FixedTicker(ticks=[0, 25, 50, 75, 100]),
    title="internet users / 100 people",
    title_text_font_style="normal",
    major_tick_line_color=None,
    width=300,
    height=20,
)

# add color bar above the plot
p.add_layout(color_bar, "above")


show(p)

	country	year	percentage
0	Argentina	1990	0.000000
1	Argentina	1991	0.000000
2	Argentina	1992	0.002993
3	Argentina	1993	0.029527
4	Argentina	1994	0.043706
...	...	...	...
535	South Africa	2012	41.000000
536	South Africa	2013	46.500000
537	South Africa	2014	49.000000
538	South Africa	2015	51.919116
539	South Africa	2016	54.000000

Visualize amounts with dot plots and heatmaps¶

Dot plots¶

Data preparation¶

Plotting¶

Heatmap¶

Data preparation¶

Plotting¶

	country	years
0	Argentina	75.0
1	Bolivia	70.0
2	Brazil	73.0
3	Canada	81.0
4	Chile	78.0
5	Colombia	77.0
6	Costa Rica	80.0
7	Cuba	78.0
8	Dominican Republic	73.0
9	Ecuador	74.0
10	Guatemala	70.0
11	Honduras	71.0
12	Haiti	60.0
13	Jamaica	76.0
14	Mexico	76.0
15	Nicaragua	74.0
16	Panama	78.0
17	Peru	78.0
18	Paraguay	76.0
19	Trinidad and Tobago	73.0
20	Uruguay	76.0
21	United States	78.0
22	Venezuela	75.0