-
Notifications
You must be signed in to change notification settings - Fork 1
/
Pacejka_database_analysis.py
133 lines (98 loc) · 4.9 KB
/
Pacejka_database_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# -*- coding: utf-8 -*-
# E. Cabrol - May 26, 2020
# Analysis of tire database with Pandas
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import re
import numpy as np
# function used to scrape filepath
def scrape(filepath):
gotIt = re.search(r"^(\w+?)_(.+?)/(.+?)/(.+?)/(.+?)/(.+?)/(.+)",filepath)
return gotIt.group(1),gotIt.group(2),gotIt.group(3),gotIt.group(4),gotIt.group(6),gotIt.group(7)
# Definitions
params_longi=("PCX1","PDX1","PDX2","PEX1","PEX2","PEX3","PEX4","PKX1","PKX2","PKX3","PHX1","PHX2","PVX1","PVX2")
# Import database as dataframe
# na_values argument to avoid some useless lines
df = pd.read_csv('BD_tire_ALL.txt',sep='\t',encoding = "ISO-8859-1",na_values='#DIV/0!')
# df = pd.read_csv('BD_tire_ALL_cleaned.txt',sep='\t',encoding = "ISO-8859-1",na_values='#DIV/0!')
# Remove useless columns
df=df.drop(columns=['FILE_TYPE', 'FILE_FORMAT','LENGTH','FORCE','ANGLE','MASS','TIME','PROPERTY_FILE_FORMAT','MFSAFE1','MFSAFE2','MFSAFE3',
'VXLOW','LONGVL','TYPE','ASPECT_RATIO','VERTICAL_DAMPING','KPUMIN','KPUMAX','ALPMIN','ALPMAX','CAMMIN','CAMMAX',
'LFZO','LCX','LMUX','LEX','LKX','LHX','LVX','LCY','LVY','LMUY','LEY','LKY','LHY','LVY','LGAY','LTR','LRES','LGAZ','LXAL','LYKA','LVYKA',
'LS','LSGKP','LSGAL','LGYR','LMX','LMY','LGAX','LVMX','QTZ1','MBELT','FileIdent','FileCreator'])
#%% Add new columns
# for usual Pacejka macro-coefficients
# Cx = PCX1
# Dx = PDX1. Fnomin
df['Dx'] = df.PDX1 * df.FNOMIN
# Kx = PKX1. Fnomin
df['Kx'] = df.PKX1 * df.FNOMIN
# Bx = Kx/(CxDx) = PKX1 / (PCX1.PDX1)
df['Bx'] = df.PKX1 / (df.PCX1 * df.PDX1)
#Ex = PEX1
# and for information that can be extracted from the pathname to the tirfile, using "scrape" function
for index,row in df.iterrows():
filepath=df.loc[index,'File']
(tire_oem,tire_model,tire_dim,tire_dot,tire_measure,tire_tirfile)= scrape(filepath)
match_object = re.search(r"P(\d+)bar",tire_tirfile)
if match_object:
pressure=float(match_object.group(1))/10
else:
pressure="NULL"
df.loc[index,'OEM']=tire_oem
df.loc[index,'Model']=tire_model
df.loc[index,'Dim']=tire_dim
df.loc[index,'DOT']=tire_dot
df.loc[index,'Pressure']=pressure
df.loc[index,'Measure']=tire_measure
df.loc[index,'tirfile']=tire_tirfile
#%% Issue with sign conventions for PDX1 and PDX2
# In the initial database about 10% of the tires have a negative PDX1 value
df[df.PDX1<0].count()
# Nothing specific can be identified among those tires : all oems are concerned, different test procedures, etc ...
df_PDX1neg=df[df.PDX1<0]
# Absolute value of PDX1 is used from now on
# NB : this triggers a SettingWithCopyWarning when run from the console
df.PDX1=abs(df.PDX1)
#%% LONGITUDINAL - Only "Use mode = 4"
df_mode4 = df[df['USE_MODE']==4]
stats_mode4=df_mode4.loc[:,'PCX1':'PKX3'].describe()
# A few plots
plt.figure(1)
df_mode4.PCX1.plot(kind='hist',bins=40,title='PCX1 - USE_MODE=4')
plt.figure(2)
df_mode4.PDX1.plot(kind='hist',bins=40,title='PDX1 - USE_MODE=4')
df.plot.scatter(x='PDX1',y='PDX2')
plt.show()
#%%
# Check for outliers
# The following section classifies as outliers all tires for which the difference with the mean is greater than
# coef_nok times the standard deviation
coef_nok=5.0
df_nok=pd.DataFrame() # create an empty dataframe
headers=["name","min","1%","25%","mean","75%","99%","max","std"]
headers_format = "{:^10s}" * len(headers)
print(headers_format.format(*headers))
for param in params_longi:
# print table of statistical data for each parameter
print("{:^10s}".format(param), end='')
values=[df_mode4[param].min(),df_mode4[param].quantile(0.01),df_mode4[param].quantile(0.25),df_mode4[param].mean(),df_mode4[param].quantile(0.75),df_mode4[param].quantile(0.99),df_mode4[param].max(),df_mode4[param].std()]
values_format="{:^10.2f}" * len(values)
print(values_format.format(*values),end='')
# check if (max-mean) or (mean-min) are greater than accepted threshold
upperHalfRange=df_mode4[param].max()-df_mode4[param].mean()
lowerHalfRange=df_mode4[param].mean()-df_mode4[param].min()
if upperHalfRange >= coef_nok*df_mode4[param].std() or lowerHalfRange >= coef_nok*df_mode4[param].std():
print("\tKO")
#append the NOK indexes (for this param) to the df_nok dataframe
df_nok=df_nok.append(df_mode4[np.abs(df_mode4[param]-df_mode4[param].mean()) >= (coef_nok*df_mode4[param].std())])
else:
print("\t-")
# Remove duplicates from df_nok since some indexes may have been counted multiple times
df_nok.drop_duplicates(inplace=True)
#%% Create new dataframes
# Create a "clean" dataframe
df_clean=df_mode4.drop(df_nok.index)
# Plot a scatter matrix for this dataframe
scatter_matrix(df_clean.loc[:,'PCX1':'PKX3'],figsize=(12,12))