New coders use AI more in their work but also see it as a threat

Contribution to #TidyTuesday analysing data from the Stackoverflow Developer Survey 2024. Analysis and charts created with python, site made with Quarto.

Setup

To start, we’ll need to load all the libraries and data we need. Feel free to skip the boring bit …

Code

#load libraries
import pandas as pd
import matplotlib.pyplot as plt 
from matplotlib.gridspec import GridSpec
import numpy as np
from matplotlib import font_manager
import matplotlib as mpl
import matplotlib.cm as cm

# load data
df = pd.read_csv("stackoverflow_survey_single_response.csv")
label_dict = pd.read_csv("qname_levels_single_response_crosswalk.csv")

# set fonts
title_font = font_manager.FontProperties(fname='C:\\Windows\\Fonts\\UbuntuMono-B.ttf')
legend_font = font_manager.FontProperties(fname='C:\\Windows\\Fonts\\Roboto-Regular.ttf')
label_font = font_manager.FontProperties(fname='C:\\Windows\\Fonts\\UbuntuMono-R.ttf')

# set colormap
cmap = mpl.colormaps['spring_r']

Now we’ll clean the data and derive additional variables.

Code

# map labels to questions
columns = ["age", "ai_sent", "ai_select", "ai_acc", "ai_complex", "ai_threat"] 
for column in columns:
    _filter = label_dict["qname"]==column
    map = dict(zip(label_dict[_filter]["level"],  label_dict[_filter]["label"]))
    df[column + "_l"] = df[column].map(map)

#create column based on binned values and add labels
_bins = [0,5,10,15,20,25,30, 100]
labels = [str(i+1)+"-"+str(i+5) for i in _bins[0:-2]] + [">30"]
df['years_code_bin'] = pd.cut(df['years_code'], bins=_bins, labels=labels)
df['years_code_pro_bin'] = pd.cut(df['years_code_pro'], bins=_bins, labels=labels)

Who uses AI?

Code

#data -------------------------
pivot = df.groupby(["age","age_l"])["ai_select_l"].value_counts(normalize=True).unstack() *100
pivot = pivot["Yes"].to_frame().reset_index()
pivot["age"] = pivot["age"].replace(8,0)
pivot = pivot.sort_values(by="age")

#chart setup ------------
fig, ax = plt.subplots(figsize=(6,5), dpi=82)
cols = {"bg": "None", "text": "#D6DEEC","title": "#32A6FE", "grid": "#2C3952", "subtitle": "#8EA1C4"}
fig.set_facecolor(cols["bg"])

#colormap ---------------
norm = mpl.colors.Normalize(vmin=0, vmax=1)
m = cm.ScalarMappable(norm=norm, cmap=cmap)

#plot data --------------------
ax.barh(pivot["age_l"], pivot["Yes"], height=0.55, color=m.to_rgba(1), zorder=3)

#format axes
ax.set_facecolor(cols["bg"])
ax.invert_yaxis()
for pos in ["top", "right", "bottom"]:
    ax.spines[pos].set_visible(False)
ax.spines["left"].set(color=cols["grid"], linewidth=1)
ax.grid(visible=True, axis='x', zorder=2, color=cols["grid"], linewidth=0.5, alpha=0.4)
ax.tick_params(axis='both', bottom=False, left=False, pad=10,)
ax.set_xticks(ax.get_xticks(), [str(int(i))+"%"for i in ax.get_xticks()], 
              fontproperties=label_font, fontsize=14, color=cols["text"])
ax.set_yticks(ax.get_yticks(), pivot["age_l"], fontproperties=label_font, fontsize=14, color=cols["text"])

#title and footer --------------
plt.figtext(-0.19,1.15, "Young developers use AI more in their work", fontproperties = title_font, fontsize=22, ha="left", color=cols["title"])
plt.figtext(-0.19,1.03, "Proportion of developers who currently use AI in their development\nprocess by age", fontproperties = label_font, 
            fontsize=16, ha="left",color=cols["subtitle"] , va="center", linespacing=1.8)
plt.figtext(0.4,-0.09, "Data: Stackoverflow Developer Survey 2024   |   Design: Lisa Hornung", fontsize=11,  ha="center",fontproperties = label_font,  alpha=0.8, color=cols["subtitle"])
plt.show()

Code

#data--------------------------------------
pivot = df.groupby(["years_code_bin"], observed=True)["ai_select_l"].value_counts(normalize=True).unstack()*100
pivot = pivot["Yes"].to_frame().reset_index()
pivot2 = df.groupby(["years_code_pro_bin"], observed=True)["ai_select_l"].value_counts(normalize=True).unstack()*100
pivot2 = pivot2["Yes"].to_frame().reset_index()

# Chart setup ------------
fig, ax = plt.subplots(figsize=(6,6), dpi=82)
cols = {"bg": "None", "text": "#D6DEEC","title": "#32A6FE", "grid": "#2C3952", "subtitle": "#8EA1C4"}
fig.set_facecolor(cols["bg"])

# colormap ---------------
norm = mpl.colors.Normalize(vmin=0, vmax=1)
m = cm.ScalarMappable(norm=norm, cmap=cmap)

# plot data --------------------
bar_height = 0.3
ax.barh(pivot.index-bar_height/2, pivot["Yes"], height=bar_height, color=m.to_rgba(0.95), zorder=3)
ax.barh(pivot2.index+bar_height/2, pivot2["Yes"], height=bar_height, color=m.to_rgba(0.15), zorder=3)

# format axes
ax.set_facecolor(cols["bg"])
ax.invert_yaxis()
for pos in ["top", "right", "bottom"]:
    ax.spines[pos].set_visible(False)
ax.spines["left"].set(color=cols["grid"], linewidth=1)
ax.grid(visible=True, axis='x', zorder=2, color=cols["grid"], linewidth=0.5, alpha=0.4)
ax.tick_params(axis='both', bottom=False, left=False, pad=10,)
ax.set_xticks(ax.get_xticks(), [str(int(i))+"%"for i in ax.get_xticks()], 
              fontproperties=label_font, fontsize=14, color=cols["text"])
ax.set_yticks(pivot.index, [x + " years" for x in pivot["years_code_bin"]], fontproperties=label_font, fontsize=14, color=cols["text"])

#legend -------------------------------
lg = fig.add_axes([0.13,0.9,0.48,0.05])
kw = dict(marker='o', s=150, alpha=0.9, linewidths=0.45, ec="black")
lg.scatter(x=[2,3], y=[0.8]*2,  color=[m.to_rgba(0.95),m.to_rgba(0.15)], **kw, clip_on=False)
labels = ["Coding incl education", "Professional coding",]
for i, x_pos in enumerate([3,2][::-1]):
    lg.text(x_pos+0.07, 0.8, labels[i], fontsize=14, ha="left", va="center", fontproperties=label_font, color=cols["text"])
lg.axis("off")

# title and footer --------------
plt.figtext(-0.19,1.18, "AI usage decreases with coding experience", fontproperties = title_font, fontsize=22, ha="left", color=cols["title"])
plt.figtext(-0.19,1.07, "Proportion of developers who currently use AI in their\ndevelopment process by years coding", fontproperties = label_font, 
            fontsize=16, ha="left",color=cols["subtitle"] , va="center", linespacing=1.8)
plt.figtext(0.4,-0.05, "Data: Stackoverflow Developer Survey 2024   |   Design: Lisa Hornung", fontsize=11,  ha="center",fontproperties = label_font,  alpha=0.8, color=cols["subtitle"])
plt.show()

How do developers feel about using AI?

Code

#data--------------------------------------
ai_sent_r = {"Very favorable": 1,
             "Favorable" :2,
             "Indifferent": 3,
             "Unfavorable": 4,
             "Very unfavorable": 5
            }
df["ai_sent_r"] = df["ai_sent_l"].map(ai_sent_r)
pivot = df[df["ai_sent_l"]!="Unsure"].groupby(["years_code_bin"], observed=True)["ai_sent_r"].value_counts(normalize=True).unstack()*100

# Chart setup ------------
cols = {"bg": "None", "text": "#D6DEEC","title": "#32A6FE", "grid": "#2C3952", "subtitle": "#8EA1C4"}
fig, ax = plt.subplots(figsize=(8,5), dpi=82)
fig.set_facecolor(cols["bg"])

#colors
norm = mpl.colors.Normalize(vmin=0, vmax=1)
m = cm.ScalarMappable(norm=norm, cmap=cmap)
color_list = [m.to_rgba([0.95, 0.65, 0.5,0.3, 0.05])][0]

#create stacked bar chart
pivot.plot(kind='barh', stacked=True, color=color_list, ax=ax, zorder=3, width=0.6, legend=False)

# format axes
ax.set_facecolor(cols["bg"])
ax.invert_yaxis()
for pos in ["top", "right", "bottom"]:
    ax.spines[pos].set_visible(False)
ax.spines["left"].set(color=cols["grid"], linewidth=1)
ax.grid(visible=True, axis='x', zorder=2, color=cols["grid"], linewidth=0.5, alpha=0.4)
ax.tick_params(axis='both', bottom=False, left=False, pad=10,)
ax.set_xticks(ax.get_xticks(), [str(int(i))+"%"for i in ax.get_xticks()], 
              fontproperties=label_font, fontsize=14, color=cols["text"])
ax.set_xlim(xmin=0, xmax=100)
ax.set_yticks(np.arange(0,len(pivot),1), [x + " years" for x in pivot.index.astype(str)], fontproperties=label_font, fontsize=14, color=cols["text"])
ax.set_ylabel("")

#legend -------------------------------
lg = fig.add_axes([0.04,0.93,0.68,0.05])
labels = list(ai_sent_r.keys())
for i, x_pos in enumerate([1,2.2,3,4,5]):
    lg.text(x_pos, 0.8, labels[i], fontsize=14, ha="left", va="center", fontproperties=title_font, color=color_list[i])
lg.set_xlim(xmin=0.9,xmax=5.1)
lg.axis("off")

# title and footer --------------
plt.figtext(-0.05,1.27, "New coders are more favourable towards using AI", fontproperties = title_font, fontsize=22, ha="left", color=cols["title"])
plt.figtext(-0.05,1.15, "Proportion of developers and their stance towards using AI in their\ndevelopment process by years coding", fontproperties = label_font, 
            fontsize=16, ha="left",color=cols["subtitle"] , va="center", linespacing=1.8)
plt.figtext(0.4,-0.09, "Data: Stackoverflow Developer Survey 2024   |   Design: Lisa Hornung", fontsize=11,  ha="center",fontproperties = label_font,  alpha=0.8, color=cols["subtitle"])

plt.show()

Do developers fear for their jobs?

Younger developers and those with less coding experience are more likely to believe AI is a threat to their jobs. But even those aged 35 years or higher can feel their jobs are at risk when they’ve been coding fewer years than other peers in their age group.

Code

# data ------------------------

# by age + years coding 
# filter to remove values where sample size < 100
df.loc[(df["age_l"] =="Under 18 years old") & (df["years_code"]>10), "code_filter"] = 1
df.loc[(df["age_l"] =="18-24 years old") & (df["years_code"]>10), "code_filter"] = 1
df.loc[(df["age_l"] =="25-34 years old") & (df["years_code"]>20), "code_filter"] = 1
df.loc[(df["age_l"] =="35-44 years old") & (df["years_code"]<6), "code_filter"] = 1
df.loc[(df["age_l"] =="35-44 years old") & (df["years_code"]>25), "code_filter"] = 1
df.loc[(df["age_l"] =="45-54 years old") & (df["years_code"]<21), "code_filter"] = 1
df.loc[(df["age_l"] =="55-64 years old") & (df["years_code"]<21), "code_filter"] = 1
_filter = ((df["code_filter"] != 1) & (~df["age_l"].isin(["65 years or older", "Prefer not to say", "55-64 years old"])))
pivot_scatter = df[_filter].groupby(["age", "years_code_bin"], observed=True)["ai_threat_l"].value_counts(normalize=True).unstack() *100
pivot_scatter = pivot_scatter["Yes"].to_frame().reset_index()
pivot_scatter["age"] = pivot_scatter["age"].replace(8,0)

# by age
_filter = ((~df["age_l"].isin(["65 years or older", "Prefer not to say",])))
pivot_bary = df[_filter].groupby(["age", "age_l"], observed=True)["ai_threat_l"].value_counts(normalize=True).unstack() *100
pivot_bary = pivot_bary["Yes"].to_frame().reset_index()
pivot_bary["age"] = pivot_bary["age"].replace(8,0)
pivot_bary = pivot_bary.sort_values(by="age")

# by years coding
_filter = ((~df["age_l"].isin(["65 years or older", "Prefer not to say",])))
pivot_barx = df[_filter].groupby(["years_code_bin"], observed=True)["ai_threat_l"].value_counts(normalize=True).unstack()*100
pivot_barx = pivot_barx["Yes"].to_frame().reset_index()

# colormap -----------------------------------------
norm = mpl.colors.Normalize(vmin=8, vmax=17)
m = cm.ScalarMappable(norm=norm, cmap=cmap)

# Chart setup -------------------------------------------------------
fig = plt.figure(figsize=(8,6), dpi=82)
gs = GridSpec(5, 5)
ax_scatter = fig.add_subplot(gs[1:5, 0:4])
ax_barx = fig.add_subplot(gs[0,0:4])
ax_bary = fig.add_subplot(gs[1:5, 4])
fig.subplots_adjust(wspace=0.5, hspace=0.8)
cols = {"bg": "None", "text": "#D6DEEC","title": "#32A6FE", "grid": "#2C3952", "subtitle": "#8EA1C4"}
fig.set_facecolor(cols["bg"])

# plot data --------------------
ax_scatter.scatter(pivot_scatter["years_code_bin"],pivot_scatter["age"], s=[i*80 for i in pivot_scatter["Yes"]],
                   c=pivot_scatter["Yes"], cmap=cmap,clip_on=False, vmin=8, vmax=17, zorder=3)
ax_bary.barh(pivot_bary["age"], pivot_bary["Yes"], height=0.4, color=m.to_rgba(pivot_bary["Yes"]), zorder=3)
ax_barx.bar(pivot_barx["years_code_bin"],pivot_barx["Yes"], width=0.4, color=m.to_rgba(pivot_barx["Yes"]) , zorder=3)

# format axes --------------------
for ax in [ax_scatter, ax_bary]:
    ax.set_ylim(ymin=-0.5, ymax=5.5)
    ax.invert_yaxis()
    ax.set_facecolor(cols["bg"])
for ax in [ax_scatter, ax_barx]:
    ax.set_xlim(xmin=-0.5, xmax=6.5)
    ax.set_facecolor(cols["bg"])
ax_bary.set_xlim(xmin=0,xmax=20)
ax_barx.set_ylim(ymin=0,ymax=20)

#spines
for pos in ["top", "right"]:
    ax_barx.spines[pos].set_visible(False)
    ax_bary.spines[pos].set_visible(False)
    ax_scatter.spines[pos].set_visible(False)
ax_bary.spines["bottom"].set_visible(False)
ax_barx.spines["left"].set_visible(False)
ax_barx.spines["bottom"].set(color=cols["grid"], linewidth=1)
ax_bary.spines["left"].set(color=cols["grid"], linewidth=1)
ax_scatter.spines["left"].set(color=cols["grid"], linewidth=1)
ax_scatter.spines["bottom"].set(color=cols["grid"], linewidth=1)

# ticks and grids
ax_barx.tick_params(axis='both', bottom=False, left=False, labelbottom=False,pad=10, labelleft=False, )
ax_bary.tick_params(axis='both', left=False,bottom=False, labelleft=False,pad=10,  labelbottom=False, )
ax_scatter.grid(visible=True, axis='both', zorder=2, color=cols["grid"], linewidth=0.5, alpha=0.4)
ax_scatter.tick_params(axis='both', left=False, bottom=False,pad=10 )
ax_scatter.set_yticks(np.arange(0,6,1), pivot_bary["age_l"], fontproperties=label_font, fontsize=14, color=cols["text"])
ax_scatter.set_xticks(ax_scatter.get_xticks(), ax_scatter.get_xticklabels(), fontproperties=label_font, fontsize=14, color=cols["text"])
ax_scatter.set_xlabel("Years coding", labelpad=15, fontproperties=title_font, fontsize=14, color=cols["text"])
ax_scatter.set_ylabel("Age", labelpad=15, fontproperties=title_font, fontsize=12, color=cols["text"])

# colorbar ------------------
cbaxes = fig.add_axes([0.68, 1.03, 0.18, 0.02]) #axes to hold colorbar
cmappable = cm.ScalarMappable(norm=mpl.colors.Normalize(0,1), cmap=cmap)              
cb = plt.colorbar(cmappable, cax=cbaxes, anchor=(1,1.2), orientation='horizontal', drawedges=False, ticks=[0.02, 0.5,0.97])
cb.set_ticks(ticks=[0.02, 0.5,0.97],labels=["7%", "12%", "17%"] , color=cols["text"], fontproperties=legend_font, fontsize=12)
cb.outline.set_visible(False)
cbaxes.tick_params(color=cols["subtitle"])

# title and footer --------------
plt.figtext(-0.17,1.15, "Less experienced developers see AI more as a threat", fontproperties = title_font, fontsize=22, ha="left", color=cols["title"])
plt.figtext(-0.17,1.03, "Proportion of developers who believe AI is a threat\nto their current job by age and years of coding ", fontproperties = label_font, 
            fontsize=16, ha="left",color=cols["subtitle"] , va="center", linespacing=1.8)
plt.figtext(0.4,-0.1, "Data: Stackoverflow Developer Survey 2024   |   Design: Lisa Hornung", fontsize=11,  ha="center",fontproperties = label_font,  alpha=0.8, color=cols["subtitle"])

plt.show()

Note

Data was excluded from the last chart (threat by age and years coding), where the sample size was smaller than 100 respondents.

Made with 💙 by Lisa Hornung. Follow me on