solar-data-tools#

[1]:
import utils
import pandas as pd

from bokeh.plotting import figure, show
from bokeh.models import HoverTool
from bokeh.transform import jitter
from bokeh.io import output_notebook
[2]:
output_notebook()
Loading BokehJS ...

GitHub#

The following plots summarize a subset of all community members that we call “contributors”; it is based on git commit authorship and thus excludes people that file bug reports, contribute to development discussions, and so on.

[3]:
gh = utils.get_github_stars('slacgismo/solar-data-tools')
[4]:
p = utils.plot_github_stars_timeseries(gh)
show(p)
[5]:
forks = utils.get_github_forks('slacgismo/solar-data-tools')
forks = pd.DataFrame({'year': forks.index.year, 'forks': forks})
[6]:
p = figure(height=350, tooltips=[("Cumulative forks", "@forks"), ("Year", "@year")],
           title="Cumulative forks")
p.vbar(x='year', top='forks', width=0.75, source=forks)
p.yaxis.axis_label = 'Total forks'
show(p)
[7]:
prs = utils.get_github_pull_requests('slacgismo/solar-data-tools')
prs = pd.DataFrame({'year': prs.index.year, 'prs': prs})
[8]:
p = figure(height=350, tooltips=[("Cumulative PRs", "@prs"), ("Year", "@year")],
           title="Cumulative PRs")
p.vbar(x='year', top='prs', width=0.75, source=prs)
p.yaxis.axis_label = 'Total PRs'
show(p)
[9]:
cumulative_contributors, annual_unique_contributors = utils.get_github_contributor_timeseries("slacgismo/solar-data-tools")
[10]:
p = utils.plot_github_contributors_timeseries(cumulative_contributors)
show(p)
[11]:
annual_unique_contributors.index = annual_unique_contributors.index.strftime('%Y')
annual_unique_contributors = annual_unique_contributors.reset_index()
annual_unique_contributors.columns = ['year', 'count']

p = figure(x_range=annual_unique_contributors['year'], height=350,
           tooltips=[("Year", "@year"), ("Unique Code Contributors", "@count")],
           title=f"Unique Code Contributors by Year",)
p.vbar(x='year', top='count', width=0.75, source=annual_unique_contributors,
       line_color='white')
p.yaxis.axis_label = 'Unique Code Contributors'
show(p)
[12]:
contributor_images = utils.get_github_contributors('slacgismo/solar-data-tools')
[13]:
mosaic = utils.make_github_contributors_mosaic(contributor_images)
mosaic
[13]:
../_images/project_solar-data-tools_14_0.png

ReadTheDocs#

[14]:
df = utils.get_rtd_analytics_data('solar-data-tools')
data_period = df['Date'].min().strftime('%Y-%m-%d') + ' to ' + df['Date'].max().strftime('%Y-%m-%d')
[15]:
total_by_version = df.groupby('Version')['Views'].sum().reset_index()

p = figure(x_range=total_by_version['Version'], height=350, tooltips=[("Version", "@Version"), ("Views", "@Views")],
           title=f"Page views by RTD version ({data_period})",)
p.vbar(x='Version', top='Views', width=0.75, source=total_by_version,
       line_color='white')
p.xaxis.major_label_orientation = 3.14/2
p.yaxis.axis_label = 'Total page views'
show(p)
[16]:
daily_views = df.pivot_table(values='Views', index='Date', columns='Version', aggfunc='sum')[['stable', 'latest']].fillna(0)

p = figure(x_axis_type="datetime", height=350, title=f"Daily views by RTD version")
hover_tool = HoverTool(tooltips=[('Date', '@x{%Y-%m-%d}'), ('Views', '@y')],
                       formatters={'@x': 'datetime'})
hover_tool.point_policy = 'snap_to_data'
p.add_tools(hover_tool)
p.line(daily_views.index, daily_views['stable'], legend_label='stable')
p.line(daily_views.index, daily_views['latest'], legend_label='latest', color='#ff7f0e')
p.yaxis.axis_label = 'Daily page views'
show(p)
[17]:
annual = df.loc[df['Version'] == 'stable', ['Date', 'Views']].set_index('Date')['Views'].resample('a').sum()
views = pd.DataFrame({'year': annual.index.year, 'views': annual})

p = figure(height=350, tooltips=[("Annual page views", "@views"), ("Year", "@year")],
           title="Annual page views")
p.vbar(x='year', top='views', width=0.75, source=views)
p.yaxis.axis_label = 'Annual page views'
show(p)
[18]:
# restrict to v0.9.1 and forward since that's when all the URLs changed
df2 = df.loc[(df['Date'] > '2022-03-30') & (df['Version'] == 'stable'), :].copy()
pathviews = df2.groupby('Path')['Views'].sum().reset_index()
[19]:
n = 20
subset = pathviews.sort_values('Views', ascending=False)[:n]
p = figure(y_range=subset['Path'], height=400, tooltips=[("URL", "@Path"), ("Views", "@Views")],
           title=f"Views by URL (Top {n}, {data_period})")
p.hbar(y='Path', right='Views', source=subset, height=0.75,
       line_color='white')
p.xaxis.axis_label = 'Total page views'
show(p)
[20]:
prefixes = ['getting_started', 'reference']
pathviews['Prefix'] = pathviews['Path'].str.split("/").str[1]
groupviews = pathviews.loc[pathviews['Prefix'].isin(prefixes), :].groupby('Prefix')['Views'].sum().loc[prefixes].sort_values().reset_index()

p = figure(x_range=groupviews['Prefix'], height=350, tooltips=[("Section", "@Prefix"), ("Views", "@Views")],
           title=f"Page views by docs section ({data_period})")
p.vbar(x='Prefix', top='Views', width=0.75, source=groupviews)
p.yaxis.axis_label = 'Total page views'
show(p)
[21]:
subset = pathviews.loc[pathviews['Prefix'].isin(prefixes), :]

p = figure(x_range=prefixes, height=350, tooltips=[("URL", "@Path"), ("Views", "@Views")],
           title=f"Page views by docs page ({data_period})")
p.scatter(x=jitter('Prefix', width=0.1, range=p.x_range, distribution='normal'),
          y='Views', source=subset)
p.yaxis.axis_label = 'Total page views'
show(p)
[22]:
for prefix in prefixes:
    subset = pathviews.loc[pathviews['Prefix'] == prefix, :]
    subset = subset.loc[~subset['Path'].str.endswith('/index.html'), :]
    subset = subset.sort_values('Views', ascending=False)[:n // 4]
    subset['Path_Clean'] = subset['Path'].str.replace('^/'+prefix+'/', '', regex=True)

    height = (600 // 20) * len(subset)
    extra = f"Top {n}, " if len(subset) == n // 4 else ''
    p = figure(y_range=subset['Path_Clean'], height=height, tooltips=[("URL", "@Path"), ("Views", "@Views")],
               title=f"{prefix}: Views by URL ({extra}{data_period})")
    p.hbar(y='Path_Clean', right='Views', source=subset, height=0.75,
           line_color='white')
    p.xaxis.axis_label = 'Total page views'
    show(p)