Demographics Responses

This section should help us get a handle on who is interested and what parts they are interested in.

from config import survey, default_color
import matplotlib.pyplot as plt
import pandas as pd
import altair as alt

Basic Demographic Questions

First lets look at raw data for all the demographic questions.

Employer

(alt.Chart(data=survey, title="Employer?")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="employed_by:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

Rank

(alt.Chart(data=survey, title="Rank")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="job_rank:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

Travel

(alt.Chart(data=survey, title="Travel Type")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="travel:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

Management

(alt.Chart(data=survey, title="Snowmass Management")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="management:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

Primary Frontier Interest

While the survey question says Primary, people were able to respond with multiple frontiers. And they did!

(alt.Chart(data=survey, title="Number of Primary Frontiers")
    .mark_bar()
    .transform_calculate(
        f_len='length(datum.frontier_all)'
    )
    .encode(
        y=alt.Y(shorthand="f_len:N"),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)
(alt.Chart(data=survey, title="Frontiers Selected")
    .mark_bar()
    .transform_flatten(
        flatten=['frontier_all'],
    )
    .encode(
        y=alt.Y(shorthand="frontier_all:N", sort="-x"),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

Cross Frontier Interest

For the purposes of cross-frontier discussions, it would be good to understand the correlations between the primary selected frontiers.

cor_data = (
    survey[['frontier_EF', 'frontier_CF', 'frontier_NF', 'frontier_TF', 'frontier_CompF', 'frontier_RF', 'frontier_IF', 'frontier_AF', 'frontier_CommF', 'frontier_UF']]
    .rename(columns={
        'frontier_EF': 'EF',
        'frontier_CF': 'CF',
        'frontier_NF': 'NF',
        'frontier_TF': 'TF',
        'frontier_CompF': "CompF",
        'frontier_RF': "RF",
        'frontier_IF': "IF",
        'frontier_AF': "AF",
        'frontier_CommF': "CommF",
        'frontier_UF': "UF"
    })
    .corr()
    .stack()
    .reset_index()
    .rename(columns={'level_0': 'frontier1', 'level_1': 'frontier2', 0: 'correlation'})
)
cor_data['correlation_label'] = cor_data['correlation'].map('{:.2f}'.format)

base = alt.Chart(cor_data, width=800, height=800).encode(
    x=alt.X('frontier1:O', title=""),
    y=alt.Y('frontier2:O', title="")    
)

# Text layer with correlation labels
# Colors are for easier readability
text = base.mark_text().encode(
    text='correlation_label',
    color=alt.condition(
        alt.datum.correlation > 0.5, 
        alt.value('white'),
        alt.value('black')
    )
)

# The correlation heatmap itself
cor_plot = base.mark_rect().encode(
    color='correlation:Q'
)

cor_plot + text # The '+' means overlaying the text and rect layer

And a sorted list of the correlations - which might help with scheduling conflicts!

def sorted_string(row):
    s = sorted([row['frontier1'], row['frontier2']])
    return '-'.join(s)

cor_data['frontier-both'] = cor_data.apply(lambda x: sorted_string(x), axis=1)
cor_data.drop_duplicates('frontier-both').sort_values('correlation')[['frontier-both','correlation_label']]
frontier-both correlation_label
2 EF-NF -0.25
1 CF-EF -0.23
36 IF-TF -0.22
17 AF-CF -0.20
37 AF-TF -0.13
9 EF-UF -0.11
14 CF-CompF -0.11
24 CompF-NF -0.09
39 TF-UF -0.09
15 CF-RF -0.08
27 AF-NF -0.07
47 AF-CompF -0.04
5 EF-RF -0.04
49 CompF-UF -0.04
58 CommF-RF -0.03
57 AF-RF -0.03
38 CommF-TF -0.02
79 AF-UF -0.02
12 CF-NF -0.02
16 CF-IF -0.02
8 CommF-EF -0.02
78 AF-CommF -0.01
18 CF-CommF -0.01
68 CommF-IF -0.01
45 CompF-RF -0.00
46 CompF-IF -0.00
34 CompF-TF 0.01
28 CommF-NF 0.05
23 NF-TF 0.05
19 CF-UF 0.07
4 CompF-EF 0.08
89 CommF-UF 0.09
35 RF-TF 0.09
6 EF-IF 0.09
26 IF-NF 0.10
56 IF-RF 0.10
67 AF-IF 0.11
59 RF-UF 0.11
3 EF-TF 0.11
7 AF-EF 0.12
13 CF-TF 0.16
48 CommF-CompF 0.18
25 NF-RF 0.18
29 NF-UF 0.20
69 IF-UF 0.20
88 CommF-CommF 1.00
77 AF-AF 1.00
0 EF-EF 1.00
33 TF-TF 1.00
55 RF-RF 1.00
44 CompF-CompF 1.00
22 NF-NF 1.00
11 CF-CF 1.00
66 IF-IF 1.00
99 UF-UF 1.00