rainclouds

"Raincloud" plots are a combination of a (half) violin plot, box plot and scatter plots. The three together can make an appealing and informative visual, particularly for large N datasets.

rainclouds!(ax, category_labels, data_array; plot_boxplots=true, plot_clouds=true, kwargs...)              

Plot a violin (/histogram), boxplot and individual data points with appropriate spacing between each.

Arguments

  • ax : Axis used to place all these plots onto.

  • category_labels : Typically Vector{String} with a label for each element in data_array

  • data_array : Typically Vector{Float64} used for to represent the datapoints to plot.

Keywords

  • gap=0.2 : Distance between elements of x-axis.

  • side=:left : Can take values of :left , :right , determines where the violin plot will be, relative to the scatter points

  • dodge : vector of Integer ` (length of data) of grouping variable to create multiple side-by-side boxes at the same x position

  • dodge_gap = 0.03 : spacing between dodged boxes

  • n_dodge : the number of categories to dodge (defaults to maximum(dodge))

  • color : a single color, or a vector of colors, one for each point

Violin/Histogram Plot Specific Keywords

  • clouds=violin : [violin, hist, nothing] to show cloud plots either as violin or histogram plot, or no cloud plot.

  • hist_bins=30 : if clouds=hist , this passes down the number of bins to the histogram call.

  • cloud_width=1.0 : Determines size of violin plot. Corresponds to width keyword arg in

violin .

  • orientation=:vertical orientation of raindclouds ( :vertical or :horizontal )

  • violin_limits=(-Inf, Inf) : specify values to trim the violin . Can be a Tuple or a Function (e.g. datalimits=extrema )

Box Plot Specific Keywords

  • plot_boxplots=true : Boolean to show boxplots to summarize distribution of data.

  • boxplot_width=0.1 : Width of the boxplot in category x-axis absolute terms.

  • center_boxplot=true : Determines whether or not to have the boxplot be centered in the category.

  • whiskerwidth=0.5 : The width of the Q1, Q3 whisker in the boxplot. Value as a portion of the boxplot_width .

  • strokewidth=1.0 : Determines the stroke width for the outline of the boxplot.

  • show_median=true : Determines whether or not to have a line should the median value in the boxplot.

  • boxplot_nudge=0.075 : Determines the distance away the boxplot should be placed from the center line when center_boxplot is false . This is the value used to recentering the boxplot.

  • show_boxplot_outliers : show outliers in the boxplot as points (usually confusing when

paired with the scatter plot so the default is to not show them)

Scatter Plot Specific Keywords

  • side_nudge : Default value is 0.02 if plot_boxplots is true, otherwise 0.075 default.

  • jitter_width=0.05 : Determines the width of the scatter-plot bar in category x-axis absolute terms.

  • markersize=2 : Size of marker used for the scatter plot.

Axis General Keywords

  • title

  • xlabel

  • ylabel

using CairoMakie
using Random
using Makie: rand_localized

####
#### Below is used for testing the plotting functionality.
####

function mockup_distribution(N)
    all_possible_labels = ["Single Mode", "Double Mode", "Random Exp", "Uniform"]
    category_type = rand(all_possible_labels)

    if category_type == "Single Mode"
        random_mean = rand_localized(0, 8)
        random_spread_coef = rand_localized(0.3, 1)
        data_points = random_spread_coef*randn(N) .+ random_mean

    elseif category_type == "Double Mode"
        random_mean = rand_localized(0, 8)
        random_spread_coef = rand_localized(0.3, 1)
        data_points = random_spread_coef*randn(Int(round(N/2.0))) .+ random_mean

        random_mean = rand_localized(0, 8)
        random_spread_coef = rand_localized(0.3, 1)
        data_points = vcat(data_points, random_spread_coef*randn(Int(round(N/2.0))) .+ random_mean)

    elseif category_type == "Random Exp"
        data_points = randexp(N)

    elseif category_type == "Uniform"
        min = rand_localized(0, 4)
        max = min + rand_localized(0.5, 4)
        data_points = [rand_localized(min, max) for _ in 1:N]

    else
        error("Unidentified category.")
    end

    return data_points
end

function mockup_categories_and_data_array(num_categories; N = 500)
    category_labels = String[]
    data_array = Float64[]

    for category_label in string.(('A':'Z')[1:min(num_categories, end)])
        data_points = mockup_distribution(N)

        append!(category_labels, fill(category_label, N))
        append!(data_array, data_points)
    end
    return category_labels, data_array
end

category_labels, data_array = mockup_categories_and_data_array(3)

colors = Makie.wong_colors()
rainclouds(category_labels, data_array;
    xlabel = "Categories of Distributions", ylabel = "Samples", title = "My Title",
    plot_boxplots = false, cloud_width=0.5, clouds=hist, hist_bins=50, 
    color = colors[indexin(category_labels, unique(category_labels))])          

rainclouds(category_labels, data_array;
    ylabel = "Categories of Distributions",
    xlabel = "Samples", title = "My Title",
    orientation = :horizontal,
    plot_boxplots = true, cloud_width=0.5, clouds=hist,
    color = colors[indexin(category_labels, unique(category_labels))])          

rainclouds(category_labels, data_array;
    xlabel = "Categories of Distributions",
    ylabel = "Samples", title = "My Title",
    plot_boxplots = true, cloud_width=0.5, clouds=hist,
    color = colors[indexin(category_labels, unique(category_labels))])          

rainclouds(category_labels, data_array;
    xlabel = "Categories of Distributions", ylabel = "Samples", title = "My Title",
    plot_boxplots = true, cloud_width=0.5, side = :right,
    violin_limits = extrema, color = colors[indexin(category_labels, unique(category_labels))])          

rainclouds(category_labels, data_array;
    xlabel = "Categories of Distributions", ylabel = "Samples", title = "My Title",
    plot_boxplots = true, cloud_width=0.5, side = :right,
    color = colors[indexin(category_labels, unique(category_labels))])          

more_category_labels, more_data_array = mockup_categories_and_data_array(6)

rainclouds(more_category_labels, more_data_array;
    xlabel = "Categories of Distributions", ylabel = "Samples", title = "My Title",
    plot_boxplots = true, cloud_width=0.5,
    color = colors[indexin(more_category_labels, unique(more_category_labels))])          

category_labels, data_array = mockup_categories_and_data_array(6)
rainclouds(category_labels, data_array;
    xlabel = "Categories of Distributions",
    ylabel = "Samples", title = "My Title",
    plot_boxplots = true, cloud_width=0.5,
    color = colors[indexin(category_labels, unique(category_labels))])          

4 of these, between 3 distributions Left and Right example With and Without Box Plot

fig = Figure(resolution = (800*2, 600*5))
colors = [Makie.wong_colors(); Makie.wong_colors()]

category_labels, data_array = mockup_categories_and_data_array(3)
rainclouds!(Axis(fig[1, 1]), category_labels, data_array;
    title = "Left Side, with Box Plot",
    side = :left,
    plot_boxplots = true,
    color = colors[indexin(category_labels, unique(category_labels))])

rainclouds!(Axis(fig[2, 1]), category_labels, data_array;
    title = "Left Side, without Box Plot",
    side = :left,
    plot_boxplots = false,
    color = colors[indexin(category_labels, unique(category_labels))])

rainclouds!(Axis(fig[1, 2]), category_labels, data_array;
    title = "Right Side, with Box Plot",
    side = :right,
    plot_boxplots = true,
    color = colors[indexin(category_labels, unique(category_labels))])

rainclouds!(Axis(fig[2, 2]), category_labels, data_array;
    title = "Right Side, without Box Plot",
    side = :right,
    plot_boxplots = false,
    color = colors[indexin(category_labels, unique(category_labels))])

# Plots wiht more categories
# dist_between_categories (0.6, 1.0)
# with and without clouds

category_labels, data_array = mockup_categories_and_data_array(12)
rainclouds!(Axis(fig[3, 1:2]), category_labels, data_array;
    title = "More categories. Default spacing.",
    plot_boxplots = true,
    dist_between_categories = 1.0,
    color = colors[indexin(category_labels, unique(category_labels))])

rainclouds!(Axis(fig[4, 1:2]), category_labels, data_array;
    title = "More categories. Adjust space. (smaller cloud widths and smaller category distances)",
    plot_boxplots = true,
    cloud_width = 0.3,
    dist_between_categories = 0.5,
    color = colors[indexin(category_labels, unique(category_labels))])


rainclouds!(Axis(fig[5, 1:2]), category_labels, data_array;
    title = "More categories. Adjust space. No clouds.",
    plot_boxplots = true,
    clouds = nothing,
    dist_between_categories = 0.5,
    color = colors[indexin(category_labels, unique(category_labels))])

supertitle = Label(fig[0, :], "Cloud Plot Testing (Scatter, Violin, Boxplot)", textsize=30)
fig