#Importing the libriaries needed
# Ignore the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')
# data visualisation and manipulation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
#import missingno as msno
#configure
# sets matplotlib to inline and displays graphs below the corressponding cell.
%matplotlib inline
style.use('fivethirtyeight')
sns.set(style='whitegrid',color_codes=True)
#import the necessary modelling algos.
#from sklearn.linear_model import LogisticRegression
#from sklearn.svm import LinearSVC
#from sklearn.svm import SVC
#from sklearn.neighbors import KNeighborsClassifier
#from sklearn.ensemble import RandomForestClassifier
#from sklearn.tree import DecisionTreeClassifier
#from sklearn.ensemble import GradientBoostingClassifier
#from sklearn.naive_bayes import GaussianNB
#model selection
#from sklearn.model_selection import train_test_split
#from sklearn.model_selection import KFold
#from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,roc_curve,roc_auc_score
#from sklearn.model_selection import GridSearchCV
#from imblearn.over_sampling import SMOTE
#preprocess.
#from sklearn.preprocessing import MinMaxScaler,StandardScaler,LabelEncoder,OneHotEncoder
# Common sklearn Model Helpers
#from sklearn import feature_selection
#from sklearn import model_selection
#from sklearn import metrics
# from sklearn.datasets import make_classification
# sklearn modules for performance metrics
#from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve
#from sklearn.metrics import auc, roc_auc_score, roc_curve, recall_score, log_loss
#from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, make_scorer
#from sklearn.metrics import average_precision_score
# ann and dl libraraies
#from keras import backend as K
#from keras.models import Sequential
#from keras.layers import Dense
#from keras.optimizers import Adam,SGD,Adagrad,Adadelta,RMSprop
#from keras.utils import to_categorical
#import tensorflow as tf
import random as rn
This dataset was a fictional dataset created by IBM to indentify important factors that may be influencing attrition for an employee. The dataset contains 1470 rows and 35 coulmns. Our project is focusing on to find the most importance metrics that influence attrition. Firstly we need to do some general statistics to get insight into the dataset, Second using machine learning to predict attrition. Maybe it will give findings that people do not usally think about regarding employee attrition.
Haveing a understanding of what make employees leave is important to know, if a person is leaving replacement cost could be high. Being aware of it will be easier to take action to improve to the employee attrition.
Some of the questions we want to cover during this project
Given that we have data on former employees, this is a standard supervised classification problem where the label is a binary variable, 0 (active employee), 1 (former employee). In this study, our target variable Y is the probability of an employee leaving the company.
Some important columns in the dataset with information about personal and employment details, explained in more:
df_employee = pd.read_csv('WA_Fn-UseC_-HR-Employee-Attrition.csv')
After loading the dataset into a dataframe, using the command under we can get a good understadning how dataset is put together. Some of functions used is listed under.
df_employee.head()
df_employee.columns
df_employee.decribe()
df_employee.shape()
df_employee.info()
From the those commands we can see that the dataset contains no missing values. It is several numerical and categorical variables. From a HR prepective,these type of data about empoyees is unlikely to feature huge amount of missing data
#Taking a look at the data set
df_employee.head()
df_employee.columns
df_employee.shape
df_employee.describe()
df_employee.info()
df_employee.isnull().sum()
Now is time to check if all variables will give some useful insights or some of them could be deleted. To check this it is possible to loop thought and check if unique value is 1, and them drop the columns.
#this fuction is not test out yet. but will be
notneeded = []
for col in df_employee.columns:
if len(df_employee[col].unique()) == 1:
notneeded.append(col)
df_employee.drop(col,inplace=True,axis=1)
print(notneeded)
df_employee.drop(['EmployeeNumber'], axis = 1, inplace = True)
print(df_employee.shape)
df_employee.head()
After running this loop, the columns dropped where EmployeeCount, Over18 and StandardHours Also the EmployeeNumber is just a number increaseing so that column is dropped. The Next step would be to look at how the different variables are correlated.
f, ax = plt.subplots(figsize=(20, 20))
corr = df_employee.corr()
sns.heatmap(corr, mask=np.zeros_like(corr, dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True),
square=True, ax=ax, annot = True)
Starting the EDA of with some histograms of the for numerical features.
df_hrs = df_employee.copy()
df_hr_cat_name = df_employee.copy()
df_Anumber = df_employee.copy()
df_employee.hist(figsize=(20,20))
plt.show()
plt.figure(figsize=(20,5))
plt.subplot(1,2,1)
df_employee['Attrition'].value_counts().plot.pie(autopct='%1.1f%%')
plt.subplot(1,2,2)
sns.countplot(df_employee['Attrition'])
df_employee['Attrition'].value_counts().to_frame()
From the attrition rate graphs we can see that the majority is still there. Important mention, piechart and bar chart interpert the colors differently.
plt.figure(figsize=(20,5))
plt.subplot(1,2,1)
df_employee['BusinessTravel'].value_counts().plot.pie(autopct='%1.1f%%')
plt.subplot(1,2,2)
sns.countplot(df_employee['BusinessTravel'])
#df_employee['BusinessTravel'].value_counts().to_frame()
print(df_employee.groupby('BusinessTravel')['Attrition'].value_counts())
#df_employee.groupby('BusinessTravel')['Attrition'].value_counts
#plt.subplot(1,3,3)
#df_employee.groupby('Attrition')['BusinessTravel'].value_counts(df_employee.Attrition.all()).plot.pie(autopct='%1.1f%%',figsize=(11,6))
#print(df_employee.groupby(['BusinessTravel','Gender'])['Attrition'].value_counts(100. * df_employee.Attrition.value_counts() / len(df_employee.Attrition)))
The business travels have a clear amount that travel rarely, the person travel frequently or do not travel is small
plt.figure(figsize=(20,5))
plt.subplot(1,2,1)
df_employee['OverTime'].value_counts().plot.pie(autopct='%1.1f%%')
plt.subplot(1,2,2)
sns.countplot(df_employee['OverTime'])
#df_employee['OverTime'].value_counts().to_frame()
df_employee.groupby('OverTime')['Attrition'].value_counts()
1/3 of the people tend to have overtime
plt.figure(figsize=(20,5))
plt.subplot(1,2,1)
df_employee['Department'].value_counts().plot.pie(autopct='%1.1f%%')
plt.subplot(1,2,2)
sns.countplot(df_employee['Department'])
#print(df_employee.groupby('Department')['Attrition'].value_counts())
df_employee.groupby('Department')['Attrition'].value_counts()
#pd.pivot_table(df_employee, values = 'Department', index='Attrition').reset_index()
#sns.countplot(df_employee.groupby('Department',)['Attrition'])
#df_employee['Department'].value_counts().to_frame()
The majority of the people are part of the research & Development department with 65%, also sales department are big with 30%
#need to change the value to the column to get a better understanding of what the graph says
df_employee.Education.replace({1: 'High School', 2:'College', 3:'Bachelor', 4:'Master', 5:'Doctorate'},inplace=True)
df_hrs.Education.replace({1: 'High School', 2:'College', 3:'Bachelor', 4:'Master', 5:'Doctorate'},inplace=True)
#df_employee.Education.replace({'High School':1, 'Undergrad':2,'Graduate':3, 'Post Graduate':4, 'Doctorate':5},inplace=True)
plt.figure(figsize=(20,5))
plt.subplot(1,2,1)
df_employee['Education'].value_counts().plot.pie(autopct='%1.1f%%')
plt.subplot(1,2,2)
sns.countplot(df_employee['Education'])
plt.xticks(rotation=45)
#df_employee['Education'].value_counts().to_frame()
df_employee.groupby('Education')['Attrition'].value_counts()
in the education level is a clear for persons having bacheolor andmaster degree
plt.figure(figsize=(20,5))
plt.subplot(1,2,1)
df_employee['EducationField'].value_counts().plot.pie(autopct='%1.1f%%')
plt.subplot(1,2,2)
sns.countplot(df_employee['EducationField'])
plt.xticks(rotation=45)
#df_employee['EducationField'].value_counts().to_frame()
print(df_employee.groupby('EducationField')['Attrition'].value_counts().to_frame())
There are two field that are dominant both medical and life science is
plt.figure(figsize=(20,10))
sns.catplot(y='JobRole', kind='count', aspect=4, data=df_employee)
print(df_employee['JobRole'].value_counts())
print(df_employee.groupby('JobRole')['Attrition'].value_counts())
We can see the divition of different job roles, there are most sales excutive and least working in human resources.
# Changing numeric values to corresponding categorical values
df_employee['EnvironmentSatisfaction'] = df_employee['EnvironmentSatisfaction'].map({1: 'Low', 2: 'Medium', 3: 'High', 4: 'Very High'})
df_employee['JobInvolvement'] = df_employee['JobInvolvement'].map({1: 'Low', 2: 'Medium', 3: 'High', 4: 'Very High'})
df_employee['JobSatisfaction'] = df_employee['JobSatisfaction'].map({1: 'Low', 2: 'Medium', 3: 'High', 4: 'Very High'})
df_employee['RelationshipSatisfaction'] = df_employee['RelationshipSatisfaction'].map({1: 'Low', 2: 'Medium', 3: 'High', 4: 'Very High'})
df_employee['PerformanceRating'] = df_employee['PerformanceRating'].map({1: 'Low', 2: 'Good', 3: 'Excellent', 4: 'Outstanding'})
df_employee['WorkLifeBalance'] = df_employee['WorkLifeBalance'].map({1: 'Bad', 2: 'Good', 3: 'Better', 4: 'Best'})
plt.figure(figsize=(18,8))
plt.subplot(2,3,1)
sns.countplot(df_employee['EnvironmentSatisfaction'])
plt.subplot(2,3,2)
sns.countplot(df_employee['JobInvolvement'])
plt.subplot(2,3,3)
sns.countplot(df_employee['JobSatisfaction'])
plt.subplot(2,3,4)
sns.countplot(df_employee['RelationshipSatisfaction'])
plt.subplot(2,3,5)
sns.countplot(df_employee['PerformanceRating'])
plt.subplot(2,3,6)
sns.countplot(df_employee['WorkLifeBalance'])
Overall is the metrics, mostly from maybe surveys show that people are happy and scoring high on the metrics.
Wonder why the perfomance rating is only 3 and 4. More people are overall more satified with their situation.
We can see the divition of different job roles, there are most sales excutive and least working in human resources.
1. Hypothesis: Single people tend to leave more often than married people?
2. Hypothesis: Male are more active leavers?
plt.figure(figsize=(20,5))
plt.subplot(1,2,1)
sns.violinplot(x ="Gender", y ="Age", hue ="Attrition",
data = df_hrs, split = True)
plt.subplot(1,2,2)
sns.violinplot(x ="Gender", y ="Age", hue ="OverTime",
data = df_hrs, split = True)
plt.xticks(rotation=45)
#df_employee['OverTime'].value_counts()
plt.subplots(figsize=(20,5))
sns.histplot(data=df_employee, x="Age", hue="Attrition", multiple="stack",bins=5,kde=True)
#df_employee['Gender'].value_counts(normalize=True)
df_employee.groupby('Gender')['Attrition'].value_counts()
plt.subplots(figsize=(20,5))
sns.histplot(data=df_employee, x="Age", hue="MaritalStatus", multiple="stack",bins=5)
print(df_employee['MaritalStatus'].value_counts(normalize=True))
print(df_employee.groupby(['MaritalStatus','Gender'])['Attrition'].value_counts())
sns.histplot(data=df_employee, x="MaritalStatus", hue="Gender", multiple="stack",bins=5)
From the part 3.2 we can see:
df_Anumber['Attrition'] = df_Anumber['Attrition'].map({'Yes': 1, 'No': 0})
#df_hr_cat_name['Attrition'] = df_hr_cat_name['Attrition'].map({'Yes': 1, 'No': 0})
In this Chapther we are taking a look at the different JobRoles and Joblevels and connect that to Education and Education Field. And see who are more likely to leave 3. Hypothesis: Workers in lower JobLevel are morelikely to leave
4. Hypothesis: Lower Education get more training
plt.subplots(figsize=(18,5))
sns.countplot(df_employee.JobRole, hue=df_employee.JobLevel)
plt.xticks(rotation=10)
plt.subplots(figsize=(20,5))
sns.histplot(data=df_employee, x="JobLevel" ,hue="JobRole", multiple="stack")
print(df_employee.groupby('JobLevel',)['JobRole'].value_counts(normalize=True))
print(df_employee.groupby(['JobLevel', 'JobRole'])['Attrition'].value_counts().sort_index())
From JobLevel and JobRole we can see that:
plt.subplots(figsize=(20,5))
sns.countplot(df_employee.JobRole, hue=df_employee.EducationField)
plt.xticks(rotation=10)
The dominant Education field are Life science and medical. for sales executives marking are important
plt.subplots(figsize=(20,5))
sns.countplot(df_employee.JobRole, hue=df_employee.Education)
plt.xticks(rotation=10)
more people have bachelors ad masters degree
plt.subplots(figsize=(20,5))
sns.countplot(df_employee.EducationField, hue=df_employee.Education)
plt.xticks(rotation=10)
Moving over to Traing for the employees
print(df_employee['TrainingTimesLastYear'].value_counts(['JobRole']))
print(df_employee.groupby(['TrainingTimesLastYear'])['Attrition'].value_counts(normalize=True).sort_index())
print(df_employee['TrainingTimesLastYear'].value_counts(['Attrition']))
plt.subplots(figsize=(20,5))
sns.countplot(df_employee.JobRole, hue=df_employee.TrainingTimesLastYear)
#print(df_employee['TrainingTimesLastYear'].value_counts(['JobRole']))
plt.subplots(figsize=(20,5))
sns.countplot(df_employee.JobLevel, hue=df_employee.TrainingTimesLastYear)
#df_employee.groupby(["JobRole"]).count().sort_values(["TrainingTimesLastYear"]==0)
#df1 = df_employee.melt(var_name='JobRole', value_name='TrainingTimesLastYear')
#df1
#df1 = df.melt(var_name='columns', value_name='index')
#df.apply(lambda x: x.value_counts())
print(df_employee['TrainingTimesLastYear'].value_counts()[0])
#df2
print(df_employee.JobRole[df_employee.TrainingTimesLastYear == 0].count())
#print(df_employee.groupby('JobLevel')['TrainingTimesLastYear'].value_counts()[0])
Training the employees have avarage on 2,3 times per year, indepentent on the JobLevel and JobRole
def highlight_max(s):
is_max = s == s.max()
return ['background-color: lightgreen' if v else '' for v in is_max]
#df.style.apply(highlight_max)
def highlight_min(s):
is_min = s == s.min()
return ['background-color: lightblue' if v else '' for v in is_min]
#df.style.apply(highlight_max)
TrainAtWork = df_hrs.groupby(['JobRole','Attrition'],as_index=False)[['JobInvolvement','JobSatisfaction','TrainingTimesLastYear','YearsInCurrentRole']].mean().sort_values(by=['TrainingTimesLastYear','JobSatisfaction'])
TrainAtWork.style.apply(highlight_max).apply(highlight_min)
pd.pivot_table(TrainAtWork, values = 'TrainingTimesLastYear', index='Attrition', columns = 'JobRole').reset_index()
plt.subplots(figsize=(20,5))
sns.histplot(data=df_employee, x="JobRole", hue="Attrition", multiple="stack")
print(df_employee.groupby('Attrition')['JobRole'].value_counts(normalize=True).sort_index())
print(df_employee.groupby('Attrition')['JobLevel'].value_counts(normalize=True).sort_index())
print(df_employee.groupby(['Attrition','JobLevel'])['Education'].value_counts().sort_index().sort_values())
print(df_employee.groupby('Attrition')['EducationField'].value_counts(normalize=True).sort_index())
plt.subplots(figsize=(20,5))
sns.countplot(df_employee.Attrition, hue=df_employee.JobRole)
df_employee.groupby('Attrition')['JobRole'].value_counts(normalize=True).sort_index()
plt.subplots(figsize=(20,5))
sns.countplot(df_employee.Attrition, hue=df_employee.Education)
#plt.subplots(figsize=(20,5))
#sns.countplot(df_employee.Attrition, hue=df_employee.EducationField)
#plt.subplots(figsize=(20,5))
#sns.countplot(df_employee.MaritalStatus, hue=df_employee.JobRole)
plt.subplots(figsize=(20,5))
sns.countplot(df_employee.Attrition, hue=df_employee.JobInvolvement)
plt.subplots(figsize=(20,5))
sns.histplot(data=df_employee, x="JobRole", hue="JobInvolvement", multiple="stack",bins=5, kde=True)
df_employee.groupby('JobLevel')['Age'].value_counts(bins=6).sort_index()
workForce =df_hrs.groupby(['JobRole','Attrition'], as_index=False)[['PerformanceRating','JobSatisfaction','EnvironmentSatisfaction','WorkLifeBalance']].mean().sort_values(by=['JobRole'])
workForce.style.apply(highlight_max).apply(highlight_min)
workForceD =df_hrs.groupby(['Department','Attrition'], as_index=False)[['PerformanceRating','JobSatisfaction','EnvironmentSatisfaction','WorkLifeBalance']].mean().sort_values(by=['Department'])
workForceD.style.apply(highlight_max).apply(highlight_min)
workForceG =df_hrs.groupby(['Gender','Attrition'], as_index=False)[['PerformanceRating','JobSatisfaction','EnvironmentSatisfaction','WorkLifeBalance']].mean().sort_values(by=['Gender'])
workForceG.style.apply(highlight_max).apply(highlight_min)
workForceE =df_hrs.groupby(['Education','Attrition'], as_index=False)[['PerformanceRating','JobSatisfaction','EnvironmentSatisfaction','WorkLifeBalance']].mean().sort_values(by=['Education'])
workForceE.style.apply(highlight_max).apply(highlight_min)
#workForcelevel =df_hrs.groupby(['JobLevel','Education','Attrition'], as_index=False)[['PerformanceRating','JobSatisfaction','EnvironmentSatisfaction','WorkLifeBalance']].mean().sort_values(by=['JobLevel'])
#workForcelevel
enpowerments =df_hrs.groupby(['JobRole','Attrition'], as_index=False)[['PerformanceRating','StockOptionLevel','JobInvolvement','MonthlyIncome','YearsSinceLastPromotion','YearsAtCompany']].mean().sort_values(by=['JobRole'])
enpowerments.style.apply(highlight_max).apply(highlight_min)
enpowermentsD =df_hrs.groupby(['Department','Attrition'], as_index=False)[['PerformanceRating','JobInvolvement','MonthlyIncome','YearsSinceLastPromotion','YearsAtCompany']].mean().sort_values(by=['Department'])
enpowermentsD.style.apply(highlight_max).apply(highlight_min)
enpowermentsG =df_hrs.groupby(['Gender','Attrition'], as_index=False)[['PerformanceRating','JobInvolvement','MonthlyIncome','YearsSinceLastPromotion','YearsAtCompany']].mean().sort_values(by=['Gender'])
enpowermentsG.style.apply(highlight_max).apply(highlight_min)
plt.subplots(figsize=(20,5))
sns.countplot(df_employee.JobRole, hue=df_employee.StockOptionLevel)
print(df_employee['JobRole'].value_counts(['StockOptionLevel']))
print(df_employee.groupby('StockOptionLevel')['JobRole'].value_counts().sort_index(ascending=True))
pd.pivot_table(df_employee, values = 'StockOptionLevel', index='Attrition', columns = 'JobRole').reset_index()
plt.subplots(figsize=(20,5))
sns.countplot(df_employee.Attrition, hue=df_employee.StockOptionLevel)
df_employee.groupby('Attrition',)['StockOptionLevel'].value_counts(normalize=True).sort_index()
Have you reached Stock OptionLevel 2 then the chance for attirion is much lower
df_hrs[df_hrs.JobLevel ==3].groupby('Education', as_index=False)[['JobSatisfaction']].mean().sort_values(by=['JobSatisfaction'])
#need to change the value to the column to get a better understanding of what the graph says
#df_employee.Education.replace({1: 'High School', 2:'Undergrad', 3:'Graduate', 4:'Post Graduate', 5:'Doctorate'},inplace=True)
sns.lineplot(x = 'JobLevel', y = 'Attrition', data=df_employee, hue='Education')
df_employee.groupby('StockOptionLevel')['Age'].value_counts(bins=6).sort_index()
Looking at which age tend to have different joblevels is really clear that young people start in level 1 most, of them is in this level for 10 years. If you show some encouagement you will get to an other level faster. most of the people for level 3 are in their late 30s. from 40 years old, both level 4 and 5 are more dominant.
From the two graph above we can see that the youngest people working are single and are morlelylike to leave the company. there are also more people around 30 years old leave but the their relation ship tend to be married, but people then are seeking a new direction or a new job after working a few years at one place.
# Need to Have Numeric Values for Attrition
happy_job = df_Anumber.groupby('JobRole', as_index=False)[['JobSatisfaction','EnvironmentSatisfaction','JobInvolvement']].mean().sort_values(by=['JobInvolvement'])
happy_job.style.apply(highlight_max).apply(highlight_min)
df_Anumber['OverTime'] = df_Anumber['OverTime'].map({'Yes': 1, 'No': 0})
df_employee.groupby('JobRole', as_index=False)[['Age']].mean().sort_values(by=['Age'])
df_Anumber[df_Anumber.JobLevel ==3].groupby('Education', as_index=False)[['JobSatisfaction']].mean().sort_values(by=['JobSatisfaction'])
df_employee['Attrition'] = df_employee['Attrition'].map({'Yes': 1, 'No': 0})
df_Anumber.Education.replace({'High School':1, 'Collage':2,'Bachelor':3, 'Master':4, 'Doctorate':5},inplace=True)
role_income = df_employee.groupby('JobRole', as_index=False)[['MonthlyIncome', 'Attrition']].mean().sort_values(
by=['MonthlyIncome'])
role_income.style.apply(highlight_max).apply(highlight_min)
People eith doctor degree on level 3 have lower overall job satifacation than other education levels
df_Anumber[df_Anumber.JobLevel ==4].groupby('Education', as_index=False)[['YearsSinceLastPromotion','TrainingTimesLastYear','JobSatisfaction']].mean().sort_values(by=['YearsSinceLastPromotion'])
plt.figure(figsize=(20,10))
sns.boxplot(data=df_employee, x='JobRole', y='YearsAtCompany',hue='Attrition')
plt.figure(figsize=(20,10))
sns.boxplot(data=df_employee, x='JobRole', y='MonthlyIncome',hue='Attrition')
It takes time for people with doctor degree to reach level 4 among people with higher education
plt.figure(figsize=(20,10))
sns.boxplot(data=df_employee, x='JobLevel', y='MonthlyIncome',hue='Attrition')
People in level 4 that leave has much less income than those who stay
These plots need Attrition as a numeric feature.
sns.catplot(x = 'NumCompaniesWorked', y = 'Attrition', data=df_employee, aspect= 3, kind = 'bar')
Have you worked for 2-4 companiesyou are less likely to leave.
sns.factorplot(x = 'NumCompaniesWorked', y = 'Attrition', hue = 'Gender', data=df_employee, aspect= 3, kind = 'bar')
Splitting on gender, we can clearly see that the attrtion rate stays up for male working for many companies, but woman are lower
sns.factorplot(x = 'JobLevel', y = 'Attrition', hue = 'Education', data=df_employee, aspect= 4, ci=None)
A high education at job level 3 is increasing the attrition rate.
fig, ax1 = plt.subplots(figsize=(20,6))
sns.lineplot(data = role_income, x='JobRole',y='Attrition', sort = False, ax=ax1)
ax2 = ax1.twinx()
sns.barplot(data = role_income, x='JobRole', y='MonthlyIncome', alpha=0.7, ax=ax2)
Sales have hghest attrition and lower income, attrition increase for HR, due to higher income and good jobsatifaction, least attrition for those with most income
From the EDA we obtained some interesting findings.
From the EDA we can some interesting things
sns.factorplot(x = 'Education', y = 'YearsSinceLastPromotion', hue = 'Attrition', data=df_employee, aspect= 4, ci=None)
sns.factorplot(x = 'JobRole', y = 'YearsSinceLastPromotion', hue = 'Department', data=df_employee, aspect= 4, ci=None)
sns.factorplot(x = 'JobRole', y = 'TrainingTimesLastYear', hue = 'Attrition', data=df_employee, aspect= 4, ci=None)
sns.factorplot(x = 'JobRole', y = 'YearsSinceLastPromotion', hue = 'Attrition', data=df_employee, aspect= 4, ci=None)
sns.factorplot(x = 'YearsSinceLastPromotion', y = 'TrainingTimesLastYear', hue = 'Attrition', data=df_employee, aspect= 4, ci=None)
sns.factorplot(x = 'YearsSinceLastPromotion', y = 'TrainingTimesLastYear', hue = 'JobInvolvement', data=df_employee, aspect= 4, ci=None)
sns.factorplot(x = 'TrainingTimesLastYear', y = 'YearsSinceLastPromotion', hue = 'JobRole', data=df_employee, aspect= 4, ci=None)
#sns.factorplot(x = 'TrainingTimesLastYear', y = 'YearsSinceLastPromotion', hue = 'Education', data=df_employee, aspect= 4, ci=None)
sns.factorplot(x = 'YearsSinceLastPromotion', y = 'TrainingTimesLastYear', hue = 'Attrition', data=df_Anumber, aspect= 4, ci=None)
sns.factorplot(x = 'TrainingTimesLastYear', y = 'JobSatisfaction', hue = 'JobRole', data=df_Anumber, aspect= 4, ci=None)