--- /dev/null
+from itertools import product
+from parse_logs import parse
+
+# list of hyperparams
+hyperparams = {
+ "optim": ["sgd", "adadelta", "adam"],
+ "learning_rate": ['1', '0.1', '0.01', '0.001'],
+ "start_decay_steps": [None, '19750', '9875']
+ }
+
+log_files = ["/srv/ftp/share/archive/training/Clean-cs-en/logs", "/srv/ftp/share/archive/training/Clean-de-en/logs", "/srv/ftp/share/archive/training/Tagged-cs-en/logs", "/srv/ftp/share/archive/training/Tagged-de-en/logs"]
+
+# build table with possible configs
+hyper_configs = [i for i in product(hyperparams["optim"], hyperparams["learning_rate"], hyperparams["start_decay_steps"])]
+result_table = {}
+
+# get list of logs
+logs = parse(log_files)
+
+for corpus in logs.keys():
+ print(f"Corpus: {corpus} with {len(logs[corpus]['train'])}")
+ result_table[corpus] = {}
+ for config in hyper_configs:
+ for run in logs[corpus]["train"]:
+ params = run["params"]
+ current_config = (params.get("optim", "sgd"), params.get("learning_rate", 1), params.get("start_decay_steps", None))
+ if config == current_config:
+# print(f"Found model in {params.get('path')} for {config}")
+ result_table[corpus][config] = params["path"]
+ if config not in result_table[corpus]:
+ print(f"No model found for {config}")
+
+
+#print(result_table)
+
+# put values into table
+
+
"start_time" : re.compile(r"^\[(?P<start_time>.+) INFO\] Starting training .*"),
"end_time" : re.compile(r"^\[(?P<end_time>.+) INFO\] Saving checkpoint .*"),
"model_path_train" : re.compile("^save_model: ?\"(?P<model_path>.+)\""),
- "optim" : re.compile("optim: \"(?P<optim>.+)\""),
- "learning_rate" : re.compile("learning_rate: (?P<learning_rate>.+)"),
- "start_decay_steps" : re.compile("start_decay_steps: (?P<start_decay_steps>.+)"),
+ "optim" : re.compile("optim: ?\"(?P<optim>.+)\""),
+ "learning_rate" : re.compile("learning_rate: ?(?P<learning_rate>.+)"),
+ "start_decay_steps" : re.compile("start_decay_steps: ?(?P<start_decay_steps>.+)"),
"corpus_train" : re.compile("Using config from .+/(?P<corpus>[^/]+)/.+?config$"),
"type" : re.compile("Using config from .+/(?P<type>[^/]+).+?config$")
}, "preprocess": {
def extract_config(rules, path):
with open(path) as log_file:
- config = {}
+ config = {"path": path}
n_lines = 0
for line in log_file:
n_lines += 1
model = []
with open(path) as log_file:
current_step = {}
- for line in tqdm(log_file, total=config["length"]):
+ for line in tqdm(log_file, disable=(__name__ != '__main__'), total=config["length"]):
# apply regex to each line
for rule, regex in rules[config["type"]].items():
if regex.match(line):
models = {}
- for log in tqdm(log_files):
+ for log in tqdm(log_files, disable=(__name__ != '__main__')):
if isfile(log):
debug(f"Found log '{log}'")
# pass over file and try to get the config section
--- /dev/null
+from os import environ, listdir
+from logging import info, warning, debug, basicConfig
+
+from fire import Fire
+from tqdm import tqdm
+
+from parse_logs import parse
+
+def calculate_perfect_validation(log, corpus, run):
+ max_valid = (0,0)
+ max_train = (0,0)
+ for step, entry in log[corpus]["train"][run]["steps"].items():
+ max_valid = (entry["step"], entry["valid_accuracy"]) if entry["valid_accuracy"] > max_valid[1] else max_valid
+ max_train = (entry["step"], entry["valid_accuracy"]) if entry["valid_accuracy"] > max_valid[1] else max_valid
+ return {"valid": max_valid, "train": max_train}
+
+def get_perfect_steps:(log):
+ corpora = corpus.keys()
+
+if __name__ == '__main__':
+ if "LOGGING" in environ:
+ basicConfig(level=environ["LOGGING"])
+ Fire({"parse": parse})