贡献¶
# Load data related to open-source contributions *other* than NumPy
fname = "data/2020/numpy_survey_results.tsv"
column_names = [
'contributed', 'projects', 'projects_other', 'contr_type',
'contr_type_other', 'regular', 'how_got_started', 'how_got_started_other',
'interested', 'limitations'
]
nonnumpy_contributions_dtype = np.dtype({
"names": column_names,
"formats": ['<U1024'] * len(column_names),
})
ossdata = np.loadtxt(
fname, delimiter='\t', skiprows=3, dtype=nonnumpy_contributions_dtype,
usecols=range(31, 42), comments=None
)
# Load data related to NumPy contributions
column_names = [
'contributed', 'contr_type', 'contr_type_other', 'regular',
'how_got_started', 'how_got_started_other', 'motivations',
'motivations_other', 'continue', 'limitations', 'limitations_other',
'interested', 'interests', 'interests_other'
]
numpy_contributions_dtype = np.dtype({
"names": column_names,
"formats": ['<U1024'] * len(column_names),
})
npdata = np.loadtxt(
fname, delimiter='\t', skiprows=3, dtype=numpy_contributions_dtype,
usecols=range(42, 57), comments=None
)
assert npdata.shape[0] == ossdata.shape[0] # Sanity check on data
# Selection criteria for subsequent analysis
oss_contributors_mask = ossdata['contributed'] == 'Yes'
np_contributors_mask = npdata['contributed'] == 'Yes'
# Compute some basic parameters on OSS/np-specific contributions
num_respondents = npdata.shape[0]
num_oss_contributors = oss_contributors_mask.sum()
num_np_contributors = np_contributors_mask.sum()
num_both_contributors = np.sum(oss_contributors_mask & np_contributors_mask)
# Compute fraction of regular contributors.
num_oss_regular = np.sum(ossdata['regular'][oss_contributors_mask] == 'Yes')
num_np_regular = np.sum(npdata['regular'][np_contributors_mask] == 'Yes')
# Links for report
glue(
'oss_contributors',
gluval(num_oss_contributors, num_respondents),
display=False
)
glue(
'np_contributors',
gluval(num_np_contributors, num_respondents),
display=False
)
glue(
'numpy_and_oss_contributors',
gluval(num_both_contributors, num_np_contributors),
display=False
)
glue(
'oss_regulars',
gluval(num_oss_regular, num_oss_contributors),
display=False
)
glue(
'np_regulars',
gluval(num_np_regular, num_np_contributors),
display=False
)
NumPy 和开源软件贡献者¶
在 1236 名调查参与者中,490 名(40%)至少为一个开源软件项目贡献过,而 88 名(7%)专门为 NumPy 贡献过。174 名(36%)的开源软件贡献者是至少一个开源软件项目的当前1定期贡献者,而 16 名(18%)的 NumPy 贡献者定期为 NumPy 贡献。鉴于 NumPy 在科学 Python 生态系统中的核心地位,85 名(97%)的 NumPy 贡献者表示也为其他开源软件项目做出了贡献。
开源软件贡献¶
下图展示了在各种流行科学 Python 项目中工作的贡献者所占的比例。
# Remove less-popular projects
projects_to_drop = (
'Gensim', 'spaCy', '',
'Other (please specify - use commas to separate multiple entries)'
)
fig, ax = plt.subplots(figsize=(12, 8))
for (start_ind, mask, label) in zip(
(0, 1),
(oss_contributors_mask, np_contributors_mask),
('Non-NumPy Contributors', 'NumPy Contributors')
):
project_data = flatten(ossdata['projects'][mask])
labels, cnts = np.unique(project_data, return_counts=True)
# Projects to drop from all datasets
for proj in projects_to_drop:
drop = (labels != proj)
labels, cnts = labels[drop], cnts[drop]
# Plot
ax.barh(
np.arange(start_ind, 2 * len(labels), 2),
100 * cnts / mask.sum(),
align='edge',
label=label,
)
ax.set_yticks(np.arange(start_ind, 2 * len(labels), 2))
ax.set_yticklabels(labels)
ax.set_xlabel('Percentage of Contributors')
ax.legend()
fig.tight_layout()

贡献类型¶
我们还询问了人们以何种方式为开源软件项目做出贡献。74% 为 NumPy 贡献过的受访者贡献了源代码,48% 贡献了文档。虽然 71% 为其他开源软件项目贡献的受访者贡献了代码,但只有 36% 的人帮助过文档工作。
oss_contr_type = flatten(ossdata['contr_type'][oss_contributors_mask])
np_contr_type = flatten(npdata['contr_type'][np_contributors_mask])
fig, ax = plt.subplots(2, 1, figsize=(8, 12))
# NOTE: Unfortunately, the categories for the OSS & np contributions aren't
# the same, so direct comparison is more difficult.
# Handle each dataset separately.
labels, cnts = np.unique(np_contr_type, return_counts=True)
# Ignore duplicate categories from bad split
labels, cnts = labels[1:], cnts[1:]
I = np.argsort(cnts)
labels, cnts = labels[I], cnts[I]
ax[0].set_title('NumPy Contributions')
ax[0].barh(np.arange(len(labels)), 100 * cnts / np_contributors_mask.sum(), align='center')
ax[0].set_yticks(np.arange(len(labels)))
ax[0].set_yticklabels(labels)
ax[0].set_xlabel('Percentage of NumPy Contributors')
# Highlight code and docs contributions
code_contr = cnts[labels == 'Programming'][0]
doc_contr = cnts[labels == 'Writing documentation'][0]
glue(
'pct_contrib_np_code',
f"{100 * code_contr / np_contributors_mask.sum():2.0f}%",
display=False,
)
glue(
'pct_contrib_np_docs',
f"{100 * doc_contr / np_contributors_mask.sum():2.0f}%",
display=False,
)
labels, cnts = np.unique(oss_contr_type, return_counts=True)
labels, cnts = labels[3:], cnts[3:]
# TODO: Remove these hacks when categories have been synchronized
labels[3] = 'Developing tutorials'
labels[-1] = 'Writing documentation'
I = np.argsort(cnts)
labels, cnts = labels[I], cnts[I]
ax[1].set_title('Other (non-NumPy) OSS Contributions')
ax[1].barh(np.arange(len(labels)), 100 * cnts / oss_contributors_mask.sum(), align='center')
ax[1].set_yticks(np.arange(len(labels)))
ax[1].set_yticklabels(labels)
ax[1].set_xlabel('Percentage of OSS Contributors')
# Highlight code and docs contributions
code_contr = cnts[labels == 'Code maintenance and development'][0]
doc_contr = cnts[labels == 'Writing documentation'][0]
glue(
'pct_contrib_oss_code',
f"{100 * code_contr / oss_contributors_mask.sum():2.0f}%",
display=False,
)
glue(
'pct_contrib_oss_docs',
f"{100 * doc_contr / oss_contributors_mask.sum():2.0f}%",
display=False,
)
fig.tight_layout()

贡献者如何开始的?¶
我们还询问了贡献者是如何开始为他们贡献的开源软件包工作的
fig, ax = plt.subplots(figsize=(12, 8))
for start_ind, (data, mask, label) in enumerate(zip(
(ossdata, npdata),
(oss_contributors_mask, np_contributors_mask),
('Non-NumPy Contributors', 'NumPy Contributors')
)):
how_data = data['how_got_started'][mask]
# Remove non-responses
how_data = how_data[how_data != '']
data = flatten(how_data)
labels, cnts = np.unique(data, return_counts=True)
# Plot
ax.barh(
np.arange(start_ind, 2 * len(labels), 2),
100 * cnts / len(how_data),
align='edge',
label=label,
)
ax.set_yticks(np.arange(start_ind, 2 * len(labels), 2))
ax.set_yticklabels(labels)
ax.set_xlabel('Percentage of Contributors')
ax.legend()
fig.tight_layout()

动机¶
促进科学研究、推广开源文化以及回馈科学社区是受访者为 NumPy 做出贡献的前三大原因。
# NumPy data only, no need to apply np/oss masks
motivations = npdata['motivations'][npdata['motivations'] != '']
labels, cnts = np.unique(flatten(motivations), return_counts=True)
I = np.argsort(cnts)
labels, cnts = labels[I], cnts[I]
fig, ax = plt.subplots(figsize=(12, 6))
ax.barh(np.arange(len(labels)), 100 * cnts / len(motivations))
ax.set_yticks(np.arange(len(labels)))
ax.set_yticklabels(labels)
ax.set_xlabel('Percentage of NumPy Contributors')
fig.tight_layout()
# Highlight top 3
glue('top_3_motivations', f"{labels[-3]}, {labels[-2]}, and {labels[-1]}", display=False)

留存¶
最后,我们询问了 NumPy 贡献者是否计划继续为 NumPy 贡献。16 名(100%)自认为是 NumPy 定期贡献者的调查参与者计划继续贡献。在 72 名不认为自己是定期贡献者的 NumPy 贡献者中,81% 计划继续贡献。
np_regular_mask = npdata['regular'] == 'Yes'
regular_continue = npdata['continue'][np_regular_mask] == 'Yes'
glue(
'regular_continue_pct',
gluval(regular_continue.sum(), np_regular_mask.sum()),
display=False
)
non_regular_contributor_mask = (np_contributors_mask) & (~np_regular_mask)
glue(
'num_nonregular_np_contributors',
non_regular_contributor_mask.sum(),
display=False
)
num_nonregular_continue = np.sum(
npdata['continue'][non_regular_contributor_mask] == 'Yes'
)
glue(
'become_regular_pct',
f'{100 * num_nonregular_continue / non_regular_contributor_mask.sum():1.0f}%',
display=False
)
尚未(或尚未)贡献的用户¶
我们还希望了解那些尚未直接为项目做出贡献的 NumPy(以及其他开源软件)用户的看法 — 他们是否普遍有贡献的意愿?对于希望回馈的用户来说,最大的障碍是什么?在 1236 名调查受访者中,547 名(44%)表示从未为任何开源软件项目贡献过,920 名(74%)表示从未专门为 NumPy 贡献过。
num_oss_non_contributors = np.sum(ossdata['contributed'] == 'No')
num_np_non_contributors = np.sum(npdata['contributed'] == 'No')
glue(
'oss_non_contributors',
gluval(num_oss_non_contributors, num_respondents),
display=False
)
glue(
'np_non_contributors',
gluval(num_np_non_contributors, num_respondents),
display=False
)
贡献意愿¶
我们询问了这些受访者,他们是否对为他们使用的开源软件包做出贡献感兴趣。453 名(83%)表示他们普遍对为开源软件贡献感兴趣,而 627 名(68%)专门对 NumPy 感兴趣。
oss_interested_mask = (ossdata['contributed'] == 'No') & (ossdata['interested'] == 'Yes')
np_interested_mask = (npdata['contributed'] == 'No') & (npdata['interested'] == 'Yes')
glue(
'oss_interested',
gluval(oss_interested_mask.sum(), num_oss_non_contributors),
display=False
)
glue(
'np_interested',
gluval(np_interested_mask.sum(), num_np_non_contributors),
display=False
)
贡献障碍¶
我们还询问了表示有兴趣贡献的受访者,有哪些最大的障碍阻止了他们这样做。
fig, ax = plt.subplots(figsize=(12, 8))
limitations = ossdata['limitations'][oss_interested_mask]
# Remove non-responses
limitations = limitations[limitations != '']
data = flatten(limitations)
labels, cnts = np.unique(data, return_counts=True)
I = np.argsort(cnts)
labels, cnts = labels[I], cnts[I]
# Plot
ax.barh(np.arange(len(labels)), 100 * cnts / len(limitations))
ax.set_yticks(np.arange(len(labels)))
ax.set_yticklabels(labels)
ax.set_xlabel('Percentage of Contributors')
fig.tight_layout()

潜在贡献者的兴趣¶
在表示有兴趣为 NumPy 贡献的受访者中,大多数(75%)对贡献源代码感兴趣。第二受欢迎的类别是贡献文档,有 382 名(63%)受访者表示了兴趣。192 名(32%)的人对贡献叙述性文档(例如教程)和技术文档(例如参考指南)都感兴趣,而 105 名(17%)只对叙述性文档感兴趣,85 名(14%)只对技术文档感兴趣。
interests = npdata['interests'][np_interested_mask]
interests = interests[interests != '']
data = flatten(interests)
labels, cnts = np.unique(data, return_counts=True)
# TODO: Remove these hacks when categories have been synchronized
labels, cnts = labels[2:], cnts[2:]
labels[3] = 'Narrative documentation (e.g. tutorials)'
labels[-1] = 'Technical documentation (e.g. docstrings)'
I = np.argsort(cnts)
labels, cnts = labels[I], cnts[I]
fig, ax = plt.subplots(figsize=(12, 8))
ax.barh(np.arange(len(labels)), 100 * cnts / len(interests))
ax.set_yticks(np.arange(len(labels)))
ax.set_yticklabels(labels)
ax.set_xlabel('Percentage of Contributors')
fig.tight_layout()
# Highlight top categories
glue('pct_want_contrib_code', f"{100 * cnts[-1] / interests.shape[0]:2.0f}%", display=False)
# Analyze documentation categories based on individual responses
narr_doc_text = "Developing educational content & narrative documentation (e.g. tutorials)"
tech_doc_text = "Writing technical documentation (e.g. docstrings, user guide, reference guide)"
tech_doc_respondents_mask = np.array(
[tech_doc_text in response for response in interests], dtype=bool
)
narr_doc_respondents_mask = np.array(
[narr_doc_text in response for response in interests], dtype=bool
)
num_both = np.sum(
tech_doc_respondents_mask & narr_doc_respondents_mask
)
num_either = np.sum(
tech_doc_respondents_mask | narr_doc_respondents_mask
)
num_narr_only = np.sum(
narr_doc_respondents_mask & ~tech_doc_respondents_mask
)
num_tech_only = np.sum(
tech_doc_respondents_mask & ~narr_doc_respondents_mask
)
glue(
'interested_in_both_doc_types',
gluval(num_both, interests.shape[0]),
display=False,
)
glue(
'interested_in_either_doc_type',
gluval(num_either, interests.shape[0]),
display=False,
)
glue(
'interested_in_narr_doc_only',
gluval(num_narr_only, interests.shape[0]),
display=False,
)
glue(
'interested_in_tech_doc_only',
gluval(num_tech_only, interests.shape[0]),
display=False,
)

- 1
即,在调查时定期贡献。这不包括过去曾定期为项目贡献的贡献者。