# Demographics Responses

This section should help us get a handle on who is interested and what parts they are interested in.

In [1]:
from config import survey, default_color
import matplotlib.pyplot as plt
import pandas as pd
import altair as alt

## Basic Demographic Questions

First lets look at raw data for all the demographic questions.

### Employer

In [2]:
(alt.Chart(data=survey, title="Employer?")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="employed_by:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

### Rank

In [3]:
(alt.Chart(data=survey, title="Rank")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="job_rank:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

### Travel

In [4]:
(alt.Chart(data=survey, title="Travel Type")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="travel:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

### Management

In [5]:
(alt.Chart(data=survey, title="Snowmass Management")
    .mark_bar()
    .encode(
        y=alt.Y(shorthand="management:N", sort='-x', title=""),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

### Primary Frontier Interest

While the survey question says _Primary_, people were able to respond with multiple frontiers. And they did!

In [6]:
(alt.Chart(data=survey, title="Number of Primary Frontiers")
    .mark_bar()
    .transform_calculate(
        f_len='length(datum.frontier_all)'
    )
    .encode(
        y=alt.Y(shorthand="f_len:N"),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

In [7]:
(alt.Chart(data=survey, title="Frontiers Selected")
    .mark_bar()
    .transform_flatten(
        flatten=['frontier_all'],
    )
    .encode(
        y=alt.Y(shorthand="frontier_all:N", sort="-x"),
        x=alt.X(shorthand="count()",title="Number of people"),
        tooltip=alt.Tooltip("count():N"),
    )
)

## Cross Frontier Interest

For the purposes of cross-frontier discussions, it would be good to understand the correlations between the primary selected frontiers.

In [8]:
cor_data = (
    survey[['frontier_EF', 'frontier_CF', 'frontier_NF', 'frontier_TF', 'frontier_CompF', 'frontier_RF', 'frontier_IF', 'frontier_AF', 'frontier_CommF', 'frontier_UF']]
    .rename(columns={
        'frontier_EF': 'EF',
        'frontier_CF': 'CF',
        'frontier_NF': 'NF',
        'frontier_TF': 'TF',
        'frontier_CompF': "CompF",
        'frontier_RF': "RF",
        'frontier_IF': "IF",
        'frontier_AF': "AF",
        'frontier_CommF': "CommF",
        'frontier_UF': "UF"
    })
    .corr()
    .stack()
    .reset_index()
    .rename(columns={'level_0': 'frontier1', 'level_1': 'frontier2', 0: 'correlation'})
)
cor_data['correlation_label'] = cor_data['correlation'].map('{:.2f}'.format)

base = alt.Chart(cor_data, width=800, height=800).encode(
    x=alt.X('frontier1:O', title=""),
    y=alt.Y('frontier2:O', title="")    
)

# Text layer with correlation labels
# Colors are for easier readability
text = base.mark_text().encode(
    text='correlation_label',
    color=alt.condition(
        alt.datum.correlation > 0.5, 
        alt.value('white'),
        alt.value('black')
    )
)

# The correlation heatmap itself
cor_plot = base.mark_rect().encode(
    color='correlation:Q'
)

cor_plot + text # The '+' means overlaying the text and rect layer

And a sorted list of the correlations - which might help with scheduling conflicts!

In [9]:
def sorted_string(row):
    s = sorted([row['frontier1'], row['frontier2']])
    return '-'.join(s)

cor_data['frontier-both'] = cor_data.apply(lambda x: sorted_string(x), axis=1)
cor_data.drop_duplicates('frontier-both').sort_values('correlation')[['frontier-both','correlation_label']]

Unnamed: 0,frontier-both,correlation_label
2,EF-NF,-0.25
1,CF-EF,-0.23
36,IF-TF,-0.22
17,AF-CF,-0.2
37,AF-TF,-0.13
9,EF-UF,-0.11
14,CF-CompF,-0.11
24,CompF-NF,-0.09
39,TF-UF,-0.09
15,CF-RF,-0.08
