New coders use AI more in their work but also see it as a threat
Contribution to #TidyTuesday analysing data from the Stackoverflow Developer Survey 2024. Analysis and charts created with python, site made with Quarto.
Setup
To start, we’ll need to load all the libraries and data we need. Feel free to skip the boring bit …
Code
#load librariesimport pandas as pdimport matplotlib.pyplot as plt from matplotlib.gridspec import GridSpecimport numpy as npfrom matplotlib import font_managerimport matplotlib as mplimport matplotlib.cm as cm# load datadf = pd.read_csv("stackoverflow_survey_single_response.csv")label_dict = pd.read_csv("qname_levels_single_response_crosswalk.csv")# set fontstitle_font = font_manager.FontProperties(fname='C:\\Windows\\Fonts\\UbuntuMono-B.ttf')legend_font = font_manager.FontProperties(fname='C:\\Windows\\Fonts\\Roboto-Regular.ttf')label_font = font_manager.FontProperties(fname='C:\\Windows\\Fonts\\UbuntuMono-R.ttf')# set colormapcmap = mpl.colormaps['spring_r']
Now we’ll clean the data and derive additional variables.
Code
# map labels to questionscolumns = ["age", "ai_sent", "ai_select", "ai_acc", "ai_complex", "ai_threat"] for column in columns: _filter = label_dict["qname"]==columnmap=dict(zip(label_dict[_filter]["level"], label_dict[_filter]["label"])) df[column +"_l"] = df[column].map(map)#create column based on binned values and add labels_bins = [0,5,10,15,20,25,30, 100]labels = [str(i+1)+"-"+str(i+5) for i in _bins[0:-2]] + [">30"]df['years_code_bin'] = pd.cut(df['years_code'], bins=_bins, labels=labels)df['years_code_pro_bin'] = pd.cut(df['years_code_pro'], bins=_bins, labels=labels)
Who uses AI?
Code
#data -------------------------pivot = df.groupby(["age","age_l"])["ai_select_l"].value_counts(normalize=True).unstack() *100pivot = pivot["Yes"].to_frame().reset_index()pivot["age"] = pivot["age"].replace(8,0)pivot = pivot.sort_values(by="age")#chart setup ------------fig, ax = plt.subplots(figsize=(6,5), dpi=82)cols = {"bg": "None", "text": "#D6DEEC","title": "#32A6FE", "grid": "#2C3952", "subtitle": "#8EA1C4"}fig.set_facecolor(cols["bg"])#colormap ---------------norm = mpl.colors.Normalize(vmin=0, vmax=1)m = cm.ScalarMappable(norm=norm, cmap=cmap)#plot data --------------------ax.barh(pivot["age_l"], pivot["Yes"], height=0.55, color=m.to_rgba(1), zorder=3)#format axesax.set_facecolor(cols["bg"])ax.invert_yaxis()for pos in ["top", "right", "bottom"]: ax.spines[pos].set_visible(False)ax.spines["left"].set(color=cols["grid"], linewidth=1)ax.grid(visible=True, axis='x', zorder=2, color=cols["grid"], linewidth=0.5, alpha=0.4)ax.tick_params(axis='both', bottom=False, left=False, pad=10,)ax.set_xticks(ax.get_xticks(), [str(int(i))+"%"for i in ax.get_xticks()], fontproperties=label_font, fontsize=14, color=cols["text"])ax.set_yticks(ax.get_yticks(), pivot["age_l"], fontproperties=label_font, fontsize=14, color=cols["text"])#title and footer --------------plt.figtext(-0.19,1.15, "Young developers use AI more in their work", fontproperties = title_font, fontsize=22, ha="left", color=cols["title"])plt.figtext(-0.19,1.03, "Proportion of developers who currently use AI in their development\nprocess by age", fontproperties = label_font, fontsize=16, ha="left",color=cols["subtitle"] , va="center", linespacing=1.8)plt.figtext(0.4,-0.09, "Data: Stackoverflow Developer Survey 2024 | Design: Lisa Hornung", fontsize=11, ha="center",fontproperties = label_font, alpha=0.8, color=cols["subtitle"])plt.show()
Code
#data--------------------------------------pivot = df.groupby(["years_code_bin"], observed=True)["ai_select_l"].value_counts(normalize=True).unstack()*100pivot = pivot["Yes"].to_frame().reset_index()pivot2 = df.groupby(["years_code_pro_bin"], observed=True)["ai_select_l"].value_counts(normalize=True).unstack()*100pivot2 = pivot2["Yes"].to_frame().reset_index()# Chart setup ------------fig, ax = plt.subplots(figsize=(6,6), dpi=82)cols = {"bg": "None", "text": "#D6DEEC","title": "#32A6FE", "grid": "#2C3952", "subtitle": "#8EA1C4"}fig.set_facecolor(cols["bg"])# colormap ---------------norm = mpl.colors.Normalize(vmin=0, vmax=1)m = cm.ScalarMappable(norm=norm, cmap=cmap)# plot data --------------------bar_height =0.3ax.barh(pivot.index-bar_height/2, pivot["Yes"], height=bar_height, color=m.to_rgba(0.95), zorder=3)ax.barh(pivot2.index+bar_height/2, pivot2["Yes"], height=bar_height, color=m.to_rgba(0.15), zorder=3)# format axesax.set_facecolor(cols["bg"])ax.invert_yaxis()for pos in ["top", "right", "bottom"]: ax.spines[pos].set_visible(False)ax.spines["left"].set(color=cols["grid"], linewidth=1)ax.grid(visible=True, axis='x', zorder=2, color=cols["grid"], linewidth=0.5, alpha=0.4)ax.tick_params(axis='both', bottom=False, left=False, pad=10,)ax.set_xticks(ax.get_xticks(), [str(int(i))+"%"for i in ax.get_xticks()], fontproperties=label_font, fontsize=14, color=cols["text"])ax.set_yticks(pivot.index, [x +" years"for x in pivot["years_code_bin"]], fontproperties=label_font, fontsize=14, color=cols["text"])#legend -------------------------------lg = fig.add_axes([0.13,0.9,0.48,0.05])kw =dict(marker='o', s=150, alpha=0.9, linewidths=0.45, ec="black")lg.scatter(x=[2,3], y=[0.8]*2, color=[m.to_rgba(0.95),m.to_rgba(0.15)], **kw, clip_on=False)labels = ["Coding incl education", "Professional coding",]for i, x_pos inenumerate([3,2][::-1]): lg.text(x_pos+0.07, 0.8, labels[i], fontsize=14, ha="left", va="center", fontproperties=label_font, color=cols["text"])lg.axis("off")# title and footer --------------plt.figtext(-0.19,1.18, "AI usage decreases with coding experience", fontproperties = title_font, fontsize=22, ha="left", color=cols["title"])plt.figtext(-0.19,1.07, "Proportion of developers who currently use AI in their\ndevelopment process by years coding", fontproperties = label_font, fontsize=16, ha="left",color=cols["subtitle"] , va="center", linespacing=1.8)plt.figtext(0.4,-0.05, "Data: Stackoverflow Developer Survey 2024 | Design: Lisa Hornung", fontsize=11, ha="center",fontproperties = label_font, alpha=0.8, color=cols["subtitle"])plt.show()
How do developers feel about using AI?
Code
#data--------------------------------------ai_sent_r = {"Very favorable": 1,"Favorable" :2,"Indifferent": 3,"Unfavorable": 4,"Very unfavorable": 5 }df["ai_sent_r"] = df["ai_sent_l"].map(ai_sent_r)pivot = df[df["ai_sent_l"]!="Unsure"].groupby(["years_code_bin"], observed=True)["ai_sent_r"].value_counts(normalize=True).unstack()*100# Chart setup ------------cols = {"bg": "None", "text": "#D6DEEC","title": "#32A6FE", "grid": "#2C3952", "subtitle": "#8EA1C4"}fig, ax = plt.subplots(figsize=(8,5), dpi=82)fig.set_facecolor(cols["bg"])#colorsnorm = mpl.colors.Normalize(vmin=0, vmax=1)m = cm.ScalarMappable(norm=norm, cmap=cmap)color_list = [m.to_rgba([0.95, 0.65, 0.5,0.3, 0.05])][0]#create stacked bar chartpivot.plot(kind='barh', stacked=True, color=color_list, ax=ax, zorder=3, width=0.6, legend=False)# format axesax.set_facecolor(cols["bg"])ax.invert_yaxis()for pos in ["top", "right", "bottom"]: ax.spines[pos].set_visible(False)ax.spines["left"].set(color=cols["grid"], linewidth=1)ax.grid(visible=True, axis='x', zorder=2, color=cols["grid"], linewidth=0.5, alpha=0.4)ax.tick_params(axis='both', bottom=False, left=False, pad=10,)ax.set_xticks(ax.get_xticks(), [str(int(i))+"%"for i in ax.get_xticks()], fontproperties=label_font, fontsize=14, color=cols["text"])ax.set_xlim(xmin=0, xmax=100)ax.set_yticks(np.arange(0,len(pivot),1), [x +" years"for x in pivot.index.astype(str)], fontproperties=label_font, fontsize=14, color=cols["text"])ax.set_ylabel("")#legend -------------------------------lg = fig.add_axes([0.04,0.93,0.68,0.05])labels =list(ai_sent_r.keys())for i, x_pos inenumerate([1,2.2,3,4,5]): lg.text(x_pos, 0.8, labels[i], fontsize=14, ha="left", va="center", fontproperties=title_font, color=color_list[i])lg.set_xlim(xmin=0.9,xmax=5.1)lg.axis("off")# title and footer --------------plt.figtext(-0.05,1.27, "New coders are more favourable towards using AI", fontproperties = title_font, fontsize=22, ha="left", color=cols["title"])plt.figtext(-0.05,1.15, "Proportion of developers and their stance towards using AI in their\ndevelopment process by years coding", fontproperties = label_font, fontsize=16, ha="left",color=cols["subtitle"] , va="center", linespacing=1.8)plt.figtext(0.4,-0.09, "Data: Stackoverflow Developer Survey 2024 | Design: Lisa Hornung", fontsize=11, ha="center",fontproperties = label_font, alpha=0.8, color=cols["subtitle"])plt.show()
Do developers fear for their jobs?
Younger developers and those with less coding experience are more likely to believe AI is a threat to their jobs. But even those aged 35 years or higher can feel their jobs are at risk when they’ve been coding fewer years than other peers in their age group.
Code
# data ------------------------# by age + years coding # filter to remove values where sample size < 100df.loc[(df["age_l"] =="Under 18 years old") & (df["years_code"]>10), "code_filter"] =1df.loc[(df["age_l"] =="18-24 years old") & (df["years_code"]>10), "code_filter"] =1df.loc[(df["age_l"] =="25-34 years old") & (df["years_code"]>20), "code_filter"] =1df.loc[(df["age_l"] =="35-44 years old") & (df["years_code"]<6), "code_filter"] =1df.loc[(df["age_l"] =="35-44 years old") & (df["years_code"]>25), "code_filter"] =1df.loc[(df["age_l"] =="45-54 years old") & (df["years_code"]<21), "code_filter"] =1df.loc[(df["age_l"] =="55-64 years old") & (df["years_code"]<21), "code_filter"] =1_filter = ((df["code_filter"] !=1) & (~df["age_l"].isin(["65 years or older", "Prefer not to say", "55-64 years old"])))pivot_scatter = df[_filter].groupby(["age", "years_code_bin"], observed=True)["ai_threat_l"].value_counts(normalize=True).unstack() *100pivot_scatter = pivot_scatter["Yes"].to_frame().reset_index()pivot_scatter["age"] = pivot_scatter["age"].replace(8,0)# by age_filter = ((~df["age_l"].isin(["65 years or older", "Prefer not to say",])))pivot_bary = df[_filter].groupby(["age", "age_l"], observed=True)["ai_threat_l"].value_counts(normalize=True).unstack() *100pivot_bary = pivot_bary["Yes"].to_frame().reset_index()pivot_bary["age"] = pivot_bary["age"].replace(8,0)pivot_bary = pivot_bary.sort_values(by="age")# by years coding_filter = ((~df["age_l"].isin(["65 years or older", "Prefer not to say",])))pivot_barx = df[_filter].groupby(["years_code_bin"], observed=True)["ai_threat_l"].value_counts(normalize=True).unstack()*100pivot_barx = pivot_barx["Yes"].to_frame().reset_index()# colormap -----------------------------------------norm = mpl.colors.Normalize(vmin=8, vmax=17)m = cm.ScalarMappable(norm=norm, cmap=cmap)# Chart setup -------------------------------------------------------fig = plt.figure(figsize=(8,6), dpi=82)gs = GridSpec(5, 5)ax_scatter = fig.add_subplot(gs[1:5, 0:4])ax_barx = fig.add_subplot(gs[0,0:4])ax_bary = fig.add_subplot(gs[1:5, 4])fig.subplots_adjust(wspace=0.5, hspace=0.8)cols = {"bg": "None", "text": "#D6DEEC","title": "#32A6FE", "grid": "#2C3952", "subtitle": "#8EA1C4"}fig.set_facecolor(cols["bg"])# plot data --------------------ax_scatter.scatter(pivot_scatter["years_code_bin"],pivot_scatter["age"], s=[i*80for i in pivot_scatter["Yes"]], c=pivot_scatter["Yes"], cmap=cmap,clip_on=False, vmin=8, vmax=17, zorder=3)ax_bary.barh(pivot_bary["age"], pivot_bary["Yes"], height=0.4, color=m.to_rgba(pivot_bary["Yes"]), zorder=3)ax_barx.bar(pivot_barx["years_code_bin"],pivot_barx["Yes"], width=0.4, color=m.to_rgba(pivot_barx["Yes"]) , zorder=3)# format axes --------------------for ax in [ax_scatter, ax_bary]: ax.set_ylim(ymin=-0.5, ymax=5.5) ax.invert_yaxis() ax.set_facecolor(cols["bg"])for ax in [ax_scatter, ax_barx]: ax.set_xlim(xmin=-0.5, xmax=6.5) ax.set_facecolor(cols["bg"])ax_bary.set_xlim(xmin=0,xmax=20)ax_barx.set_ylim(ymin=0,ymax=20)#spinesfor pos in ["top", "right"]: ax_barx.spines[pos].set_visible(False) ax_bary.spines[pos].set_visible(False) ax_scatter.spines[pos].set_visible(False)ax_bary.spines["bottom"].set_visible(False)ax_barx.spines["left"].set_visible(False)ax_barx.spines["bottom"].set(color=cols["grid"], linewidth=1)ax_bary.spines["left"].set(color=cols["grid"], linewidth=1)ax_scatter.spines["left"].set(color=cols["grid"], linewidth=1)ax_scatter.spines["bottom"].set(color=cols["grid"], linewidth=1)# ticks and gridsax_barx.tick_params(axis='both', bottom=False, left=False, labelbottom=False,pad=10, labelleft=False, )ax_bary.tick_params(axis='both', left=False,bottom=False, labelleft=False,pad=10, labelbottom=False, )ax_scatter.grid(visible=True, axis='both', zorder=2, color=cols["grid"], linewidth=0.5, alpha=0.4)ax_scatter.tick_params(axis='both', left=False, bottom=False,pad=10 )ax_scatter.set_yticks(np.arange(0,6,1), pivot_bary["age_l"], fontproperties=label_font, fontsize=14, color=cols["text"])ax_scatter.set_xticks(ax_scatter.get_xticks(), ax_scatter.get_xticklabels(), fontproperties=label_font, fontsize=14, color=cols["text"])ax_scatter.set_xlabel("Years coding", labelpad=15, fontproperties=title_font, fontsize=14, color=cols["text"])ax_scatter.set_ylabel("Age", labelpad=15, fontproperties=title_font, fontsize=12, color=cols["text"])# colorbar ------------------cbaxes = fig.add_axes([0.68, 1.03, 0.18, 0.02]) #axes to hold colorbarcmappable = cm.ScalarMappable(norm=mpl.colors.Normalize(0,1), cmap=cmap) cb = plt.colorbar(cmappable, cax=cbaxes, anchor=(1,1.2), orientation='horizontal', drawedges=False, ticks=[0.02, 0.5,0.97])cb.set_ticks(ticks=[0.02, 0.5,0.97],labels=["7%", "12%", "17%"] , color=cols["text"], fontproperties=legend_font, fontsize=12)cb.outline.set_visible(False)cbaxes.tick_params(color=cols["subtitle"])# title and footer --------------plt.figtext(-0.17,1.15, "Less experienced developers see AI more as a threat", fontproperties = title_font, fontsize=22, ha="left", color=cols["title"])plt.figtext(-0.17,1.03, "Proportion of developers who believe AI is a threat\nto their current job by age and years of coding ", fontproperties = label_font, fontsize=16, ha="left",color=cols["subtitle"] , va="center", linespacing=1.8)plt.figtext(0.4,-0.1, "Data: Stackoverflow Developer Survey 2024 | Design: Lisa Hornung", fontsize=11, ha="center",fontproperties = label_font, alpha=0.8, color=cols["subtitle"])plt.show()
Note
Data was excluded from the last chart (threat by age and years coding), where the sample size was smaller than 100 respondents.