Skip to content
Snippets Groups Projects

Sc 107/default extention file selection

Merged Ghost User requested to merge SC-107/default-extention-file-selection into master
1 file
+ 30
23
Compare changes
  • Side-by-side
  • Inline
+ 30
23
@@ -20,16 +20,18 @@ class ModelClass:
# Data(structures)
self.file_path_source = {} # Dictionary of source files
self.file_path_shadow = {} # Participant objects, containing their shadow files
self.data_source = None # internal representation of file_path_source (pandas df)
self.data_shadow = None # internal representation of file_path_shadow (pandas df)
self.analysis_results = None # tuple of the results following analysis
# Participant objects, containing their shadow files
self.file_path_shadow = {}
# internal representation of file_path_source (pandas df)
self.data_source = None
# internal representation of file_path_shadow (pandas df)
self.data_shadow = None
self.analysis_results = None # tuple of the results following analysis
self.save_pref = "standard" # Possible codes: 'standard' and 'perpart'
def read_file(self):
"""Reads the .txt or .csv and starts their analysis."""
# When selecting CSVs, run comparison stuff so far:
for p_num in self.file_path_shadow:
shadow_files = self.file_path_shadow[p_num].get_shadow_files()
@@ -37,29 +39,30 @@ class ModelClass:
print(sh_file_key)
shadow_file_path = shadow_files[sh_file_key]
print(self.file_path_source.keys())
source_file_path = self.file_path_source[sh_file_key[2:]]#Note the 2: is a bit crude. But needed for now
# Note the 2: is a bit crude. But needed for now
source_file_path = self.file_path_source[sh_file_key[2:]]
if ".txt" in shadow_file_path:
self.read_txt(source_file_path,shadow_file_path,p_num)
self.read_txt(source_file_path, shadow_file_path, p_num)
elif ".csv" in shadow_file_path:
reader = CSVReader(source_file_path, shadow_file_path)
source_words, shadow_words = reader.read()
#print(source_words,shadow_words)
# print(source_words,shadow_words)
self.stats.words_source = source_words
self.stats.words_shadow = shadow_words
comparer = DataComparer(source_words, shadow_words)
comparer.start()
#self.stats.analyze()
# self.stats.analyze()
# User preferences/settings
self.save_pref = "standard" # Possible codes: 'standard' & 'perpart'
def read_txt(self,source_file_path,shadow_file_path):
def read_txt(self, source_file_path, shadow_file_path):
"""
Interprets the stored input files and stores their contents in pandas
dataframes.
The dataframes have columns 'tekst', 'Start', 'End' and 'comment'.
Their rows are indexed with an integer.
Extension: When multiple source & shadow files used, assume equal amount.
Extension: When multiple source/shadow files used, assume equal amount.
So one on one matching needs to be in place.
"""
# To test when file_path_source and file_path_shadow are not available,
@@ -70,10 +73,10 @@ class ModelClass:
analyses_results = {}
with open(source_file_path, 'r') as source,\
open(shadow_file_path, 'r') as shadow:
self.stats.data_source = self.format_input(source)
self.stats.data_shadow = self.format_input(shadow)
analyses_results[p_num] = self.stats.analyze()
open(shadow_file_path, 'r') as shadow:
self.stats.data_source = self.format_input(source)
self.stats.data_shadow = self.format_input(shadow)
analyses_results[p_num] = self.stats.analyze()
self.analysis_results = analyses_results
@@ -110,7 +113,7 @@ class ModelClass:
output.append(ms + 1000 * (seconds + 60 * (minutes + 60 * hours)))
return output
def save_txt(self, save_path): # Needs to be extended to save multiple files
def save_txt(self, save_path): # Has to be extended to save multiple files
""" Function to save a file given the file_name
Furthermore, it can convert times to hour format if deemed necessary.
@@ -128,18 +131,20 @@ class ModelClass:
total_latency, avg_latency, latency_p_word = latency_results
mistakes_result = part_results[1]
# Make a new frame and copy the elements so that data is not changed:
# Make a new frame & copy the elements so that data is not changed:
output_latencies = pd.DataFrame(np.zeros(latency_p_word.shape),
columns=["tekst", "latency"])
output_latencies["latency"] = [self.stats.to_hour_format(latency)
for latency in latency_p_word["latency"]]
for latency in latency_p_word[
"latency"]]
output_latencies["tekst"] = latency_p_word["tekst"]
# Add comparison number when multiple files are used and put in map
txt_path = save_path
csv_path = save_path[:-4] + "_csv.txt"
if len(self.analysis_results) > 1:
# Implements a map structure per participant. If this option is selected.
# Implements a map structure per participant.
# If this option is selected.
if self.save_pref == "perpart":
folder_path_part = save_path[:-4] + "/" + part_num
os.mkdir(folder_path_part)
@@ -153,8 +158,8 @@ class ModelClass:
output_latencies.to_csv(csv_path, index=False)
np.savetxt(txt_path, output_latencies, fmt='%s',
header="tekst, latency",
footer="total latency: {}ms, average latency: {}ms, pairwise "
"mistakes: {}"
footer="total latency: {}ms, average latency: {}ms,"
"pairwise mistakes: {}"
.format(total_latency, avg_latency, mistakes_result),
comments="", delimiter=' ')
@@ -184,6 +189,8 @@ class ModelClass:
else:
participant_name = name.split('_')[0]
if participant_name in self.file_path_shadow.keys():
self.file_path_shadow[participant_name].add_shadow_file(file_path)
self.file_path_shadow[participant_name].add_shadow_file(
file_path)
else:
self.file_path_shadow[participant_name] = Participant(participant_name, file_path)
self.file_path_shadow[participant_name] = Participant(
participant_name, file_path)
Loading