Gantt Charts Version 2 - Individual Workstreams

While some users like showing Gantt charts that allow overlapping dates (see community post here), many would prefer a visual that clearly shows individual work streams, especially in organizations where members are focused on one task at a given time.

 

The input to the Python editor? A table with 4 columns:

  • Project Name
  • Person - the individual working on the project
  • start_date - the day the individual begins working on the project
  • end_date - the day the individual stops working on the project

Now let's say we accidentally double booked one of our team members. This chart will give us a warning so we can go back and correct the team workflows!

 

Here's the Python Code that generates the above chart, complete with error handling.

# SQL output is imported as a pandas dataframe variable called "df"
# primary reference: https://sukhbinder.wordpress.com/2016/05/10/quick-gantt-chart-with-matplotlib/
# reference http://www.clowersresearch.com/main/gantt-charts-in-matplotlib/

#######################################################################
### LIBRARIES ###
#######################################################################

import pandas as pd
import datetime as dt
import matplotlib.dates as dates
import matplotlib.pyplot as plt
from matplotlib.dates import MONTHLY, WEEKLY, DateFormatter, rrulewrapper, RRuleLocator
from collections import OrderedDict
import numpy as np
import datetime as dt

#######################################################################
### HELPER FUNCTIONS ###
#######################################################################

# Function: Return distinct projects/phases
# Input: dataframe with a column indicating project names
# Output: dataframe of all distinct phases

def all_phases(df):
  phases=df['project'].drop_duplicates().values.tolist()
  return phases

# Function: prepare dataframe for gantt visualization
# Input: dataframe with a column for project, team, start_date, and end_date
# Output: dataframe with all columns required for main function

def df_gantt(df):

    df['project']=df['project'].astype('category')
    df['person']=df['person'].astype('category')
    df['end_date2']=dates.date2num(df['end_date'])
    df['start_date2']=dates.date2num(df['start_date'])
    df['duration']=df['end_date2']-df['start_date2']

    #assign colors for each series
    colors=['paleturquoise','lightskyblue','plum','mediumorchid','salmon','lightpink','sandybrown','gold']
    allprojects=df['project'].drop_duplicates().sort_values().reset_index(drop=True)
# #     allgrps=df['person'].drop_duplicates().sort_values().reset_index(drop=True)
    se=pd.DataFrame(colors[0:len(allprojects)])
    se.columns=['col']
    colormatrix=pd.concat([allprojects,se],axis=1)
    df = df.merge(colormatrix,on='project',how='inner').sort_values('person').reset_index(drop=True)
    return df

##########################################################################
### MAIN PLOT
##########################################################################

# Function: Create Gantt chart
# Input: dataframe with a column for project, team, start_date, and end_date, optional parameter showtoday (boolean) that shows a line marking the current date
# Output: matplotlib object of Gantt chart

def gantt_graph(df, showtoday):

  df=df_gantt(df)
  fig, ax = plt.subplots()

  #initialize variables
  j=''
  change=0
  ylocs=[]

  #plot chart
  for i in range(len(df['start_date2'])):
    if (j!=df['person'][i]):
      change=change+1
      j=df['person'][i]
      ylocs.append(0.5*change+0.5)
    ax.barh(0.5*change+0.5, df['duration'][i], left=df['start_date2'][i],height=0.4, align='center', color=df['col'][i], alpha = 1.0, label=df['person'][i])
    ax.annotate(df['project'][i],xy=(df['start_date2'][i]+0.5*df['duration'][i],0.5*change+0.5),horizontalalignment='center')

  #format x axis
  rule = rrulewrapper(WEEKLY, interval=1)
  ax.xaxis.set_major_locator(RRuleLocator(rule))
  ax.xaxis.set_major_formatter(DateFormatter("%b %d '%y"))
  plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    top='off',      # ticks along the bottom edge are off
    bottom='on',
    rotation=0)         # ticks along the top edge are off

    #format y axis
  ax.invert_yaxis()
  ax.set_yticks(ylocs)
  ppl=df['person'].drop_duplicates().values.tolist()
  ax.set_yticklabels(ppl)
  plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left='off',      # ticks along the bottom edge are off
    right='off')        # ticks along the top edge are off


  #format border
  ax.spines['bottom'].set_linewidth(0.2)
  ax.spines['left'].set_linewidth(0.2)
  ax.spines['top'].set_linewidth(0.2)
  ax.spines['right'].set_linewidth(0.2)

  #plot a line showing current date
  if showtoday == True:
    plt.axvline(x=dt.date.today(),color='gray')

  return fig

# Function: provide error message handling for double-booked team memebers
# Input: 1. dataframe with a column for project, team, start_date, and end_date; 2. optional boolean parameter to show a line for the current date
# Output: matplotlib object of gantt chart (if no double booking cases) or error message (if any cases of double booking exist)

def workflow_errors(df, show_today=True):
  df_updated=df_gantt(df)
  errors=0
  for person in df_updated['person'].drop_duplicates().values.tolist():
    df2=df_updated[df_updated['person'] == person]
    df3=df2
    for i in range(len(df2['start_date2'])):
      for j in range(len(df3['start_date2'])):
        if (df2['end_date2'].iloc[i]>df3['start_date2'].iloc[j] and df2['end_date2'].iloc[i]<df3['end_date2'].iloc[j]):
          stringtime=df2['end_date'].iloc[i].strftime('%B %d, %Y')
          txt = person + ' has multiple workflows on/around ' + stringtime
          errors+=1
  if (errors==0):
    fig = gantt_graph(df, showtoday=show_today)
  else:
    fig = plt.figure()
    plt.axis('off')
    plt.text(.5, .5, txt, fontsize=15, color='black', ha='center')
  return fig

# # Use Periscope to visualize a dataframe or an image by passing data to periscope.output()
periscope.output(workflow_errors(df, show_today=True))
Reply Oldest first
  • Oldest first
  • Newest first
  • Active threads
  • Popular
Like1 Follow
  • 1 Likes
  • 9 mths agoLast active
  • 335Views
  • 1 Following