111 lines
3.9 KiB
Python
111 lines
3.9 KiB
Python
import glob
|
|
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import pandas as pd
|
|
import scipy.stats as stats
|
|
import seaborn as sns
|
|
|
|
# use this: https://www.estopwatch.net/
|
|
|
|
def read_file(file_path):
|
|
df = pd.read_csv(file_path)
|
|
df['Elapsed time'] = pd.to_datetime(df['Elapsed time'], errors='coerce')
|
|
return df
|
|
|
|
|
|
def analyze_new_error(run_df, groundtruth_df):
|
|
cumulative_errors = run_df['Elapsed time'] - groundtruth_df['Elapsed time']
|
|
cumulative_errors_in_seconds = cumulative_errors.dt.total_seconds()
|
|
|
|
new_errors_in_seconds = cumulative_errors_in_seconds.diff().fillna(cumulative_errors_in_seconds[0])
|
|
new_error_points = new_errors_in_seconds[new_errors_in_seconds != 0].index.tolist()
|
|
|
|
return new_errors_in_seconds[new_error_points]
|
|
|
|
def calculate_statistics(errors):
|
|
if len(errors) == 0:
|
|
return {
|
|
'mean_error': 0,
|
|
'median_error': 0,
|
|
'stddev_error': 0,
|
|
'rmse_error': 0,
|
|
'confidence_interval': (0, 0),
|
|
'error_frequency': 0
|
|
}
|
|
|
|
mean_error = np.mean(errors)
|
|
median_error = np.median(errors)
|
|
stddev_error = np.std(errors)
|
|
rmse_error = np.sqrt(np.mean(np.square(errors)))
|
|
|
|
ci_low, ci_high = stats.t.interval(
|
|
confidence=0.95,
|
|
df=len(errors) - 1,
|
|
loc=mean_error,
|
|
scale=stats.sem(errors) if len(errors) > 1 else 0
|
|
)
|
|
|
|
return {
|
|
'mean_error': mean_error,
|
|
'median_error': median_error,
|
|
'stddev_error': stddev_error,
|
|
'rmse_error': rmse_error,
|
|
'confidence_interval': (ci_low, ci_high),
|
|
}
|
|
|
|
|
|
def main():
|
|
groundtruth_file = 'groundtruth.csv'
|
|
run_files = glob.glob('runs/*.csv')
|
|
|
|
groundtruth_df = read_file(groundtruth_file)
|
|
run_dfs = {f'run{i+1}': read_file(file) for i, file in enumerate(run_files)}
|
|
|
|
total_errors = []
|
|
total_points = 0
|
|
all_errors = []
|
|
|
|
for run, df in run_dfs.items():
|
|
errors = analyze_new_error(df, groundtruth_df)
|
|
total_errors.extend(errors)
|
|
all_errors.extend(errors)
|
|
total_points += len(df)
|
|
|
|
results = calculate_statistics(errors)
|
|
error_frequency = len(errors) / len(df)
|
|
|
|
print(f"Results for {run}:")
|
|
print(f"Mean New Error: {results['mean_error']:.5f} seconds")
|
|
print(f"Median New Error: {results['median_error']:.5f} seconds")
|
|
print(f"Standard Deviation of New Error: {results['stddev_error']:.5f} seconds")
|
|
print(f"RMSE of New Error: {results['rmse_error']:.5f} seconds")
|
|
print(f"95% Confidence Interval of New Error: ({results['confidence_interval'][0]:.5f}, {results['confidence_interval'][1]:.5f}) seconds")
|
|
print(f"New Error Frequency: {error_frequency*100:.5f} %")
|
|
print('-----------------------------------------')
|
|
|
|
total_results = calculate_statistics(total_errors)
|
|
total_error_frequency = len(total_errors) / total_points
|
|
|
|
print("Total Statistics:")
|
|
print(f"Mean New Error: {total_results['mean_error']:.5f} seconds")
|
|
print(f"Median New Error: {total_results['median_error']:.5f} seconds")
|
|
print(f"Standard Deviation of New Error: {total_results['stddev_error']:.5f} seconds")
|
|
print(f"RMSE of New Error: {total_results['rmse_error']:.5f} seconds")
|
|
print(f"95% Confidence Interval of New Error: ({total_results['confidence_interval'][0]:.5f}, {total_results['confidence_interval'][1]:.5f}) seconds")
|
|
print(f"New Error Frequency: {total_error_frequency*100:.5f} %")
|
|
|
|
# do plus minus
|
|
print(f"New Error: {total_results['mean_error']:.5f} ± {total_results['confidence_interval'][1] - total_results['mean_error']:.5f} seconds")
|
|
|
|
plt.figure(figsize=(10, 5))
|
|
sns.histplot(all_errors, bins=12, kde=False)
|
|
plt.title('Distribution of Newly Introduced Errors (macOS)')
|
|
plt.xlabel('Error Duration (seconds)')
|
|
plt.ylabel('Frequency')
|
|
plt.savefig('error_dist', dpi=300)
|
|
plt.show()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|