import multiprocessing
from glob import glob
from os import path
from pathlib import PurePath
from musif import internal_data
from musif.common._logs import create_logger
from musif.common._utils import (
read_dicts_from_csv,
read_object_from_json_file,
read_object_from_yaml_file,
)
from musif.extract.constants import HARMONY_FEATURES, SCALE_RELATIVE_FEATURES, REQUIRE_MSCORE
# TODO: add documentation for these variables
LOGGER_NAME = "musiF"
LOG = "log"
LOG_FILE_PATH = "file_path"
FILE_LOG_LEVEL = "file_level"
CONSOLE_LOG_LEVEL = "console_level"
METADATA_DIR = "metadata_dir"
XML_DIR = "data_dir"
MUSESCORE_DIR = "musescore_dir"
CACHE_DIR = "cache_dir"
PARALLEL = "parallel"
MAX_PROCESSES = "max_processes"
FEATURES = "features"
BASIC_MODULES = "basic_modules"
BASIC_MODULES_ADDRESSES = "basic_modules_addresses"
FEATURE_MODULES_ADDRESSES = "feature_modules_addresses"
SPLIT_KEYWORDS = "split_keywords"
PARTS_FILTER = "parts_filter"
EXPAND_REPEATS = "expand_repeats"
WINDOW_SIZE = "window_size"
OVERLAP = "overlap"
PRECACHE_HOOKS = "precache_hooks"
MSCORE_EXEC = "mscore_exec"
INTERNAL_DATA = "internal_data_dir"
CHECK_FILE = "checking_file"
DELETE_FILES = "delete_failed_files"
DELETE_HARMONY = "delete_files_without_harmony"
GROUPED = "grouped_analysis"
SPLIT_PASSSIONS = "split_passionA"
UNBUNDLE_INSTRUMENTATION = "separate_intrumentation_column"
INSTRUMENTS_TO_KEEP = "instruments_to_keep"
INSTRUMENTS_TO_DELETE = "instruments_to_delete"
SUBSTRING_TO_DELETE = "substring_to_delete"
ENDSWITH = "columns_endswith"
STARTSWITH = "columns_startswith"
CONTAIN = "columns_contain"
REPLACE_NANS = "replace_nans"
MERGE_VOICES = "merge_voices"
PRESENCE = "delete_presence"
DFS_DIR = "dfs_dir"
_CONFIG_FALLBACK = {
LOG: {
LOG_FILE_PATH: "./musiF.log",
FILE_LOG_LEVEL: "DEBUG",
CONSOLE_LOG_LEVEL: "INFO",
},
METADATA_DIR: "metadata",
XML_DIR: ".",
MUSESCORE_DIR: None,
CACHE_DIR: None,
PARALLEL: False,
MAX_PROCESSES: 1,
PRECACHE_HOOKS: [],
BASIC_MODULES: ["core"],
BASIC_MODULES_ADDRESSES: ["musif.extract.basic_modules"],
FEATURE_MODULES_ADDRESSES: ["musif.extract.features"],
FEATURES: None,
SPLIT_KEYWORDS: [],
PARTS_FILTER: [],
EXPAND_REPEATS: False,
WINDOW_SIZE: 8,
OVERLAP: 2,
CHECK_FILE: ".",
MSCORE_EXEC: None,
DFS_DIR: None
}
_CONFIG_POST_FALLBACK = {
INTERNAL_DATA: "musif/internal_data",
DELETE_FILES: False,
GROUPED: False,
DELETE_FILES: False,
DELETE_HARMONY: False,
SPLIT_PASSSIONS: False,
UNBUNDLE_INSTRUMENTATION: False,
MERGE_VOICES: True,
CHECK_FILE: ".",
INSTRUMENTS_TO_KEEP: [],
INSTRUMENTS_TO_DELETE: [],
SUBSTRING_TO_DELETE: [],
PRESENCE: [],
ENDSWITH: [],
STARTSWITH: [],
CONTAIN: [],
REPLACE_NANS: [],
}
[docs]class Configuration:
# TODO: add documentation
def __init__(self, arg, **kwargs):
config_data = {}
if arg is not None:
if isinstance(arg, str) or isinstance(arg, PurePath):
config_data = read_object_from_yaml_file(arg)
elif isinstance(arg, dict):
config_data = arg
elif isinstance(arg, Configuration):
config_data = arg.to_dict()
else:
raise TypeError(
f"The argument type is {type(arg)}, and it was expected a dictionary, a Configuration or a string object"
)
config_data.update(kwargs) # Override values
log_config = config_data.get(LOG, _CONFIG_FALLBACK[LOG])
self.log_file = log_config.get(
LOG_FILE_PATH, _CONFIG_FALLBACK.get(LOG_FILE_PATH)
)
self.file_log_level = log_config.get(
FILE_LOG_LEVEL, _CONFIG_FALLBACK.get(FILE_LOG_LEVEL)
)
self.console_log_level = log_config.get(
CONSOLE_LOG_LEVEL, _CONFIG_FALLBACK.get(CONSOLE_LOG_LEVEL)
)
create_logger(
LOGGER_NAME, self.log_file, self.file_log_level, self.console_log_level
)
self.metadata_dir = config_data.get(
METADATA_DIR, _CONFIG_FALLBACK[METADATA_DIR]
)
self.xml_dir = config_data.get(XML_DIR, _CONFIG_FALLBACK[XML_DIR])
self.musescore_dir = config_data.get(
MUSESCORE_DIR, _CONFIG_FALLBACK[MUSESCORE_DIR]
)
self.dfs_dir = config_data.get(DFS_DIR, _CONFIG_FALLBACK[DFS_DIR])
self.cache_dir = config_data.get(CACHE_DIR, _CONFIG_FALLBACK[CACHE_DIR])
self.parallel = config_data.get(PARALLEL, _CONFIG_FALLBACK[PARALLEL])
self.max_processes = config_data.get(
MAX_PROCESSES, _CONFIG_FALLBACK[MAX_PROCESSES]
)
self.basic_modules = config_data.get(
BASIC_MODULES, _CONFIG_FALLBACK[BASIC_MODULES]
)
self.basic_modules_addresses = config_data.get(
BASIC_MODULES_ADDRESSES, _CONFIG_FALLBACK[BASIC_MODULES_ADDRESSES]
)
self.features = config_data.get(FEATURES, _CONFIG_FALLBACK[FEATURES])
self.feature_modules_addresses = config_data.get(
FEATURE_MODULES_ADDRESSES, _CONFIG_FALLBACK[FEATURE_MODULES_ADDRESSES]
)
self.split_keywords = config_data.get(
SPLIT_KEYWORDS, _CONFIG_FALLBACK[SPLIT_KEYWORDS]
)
self.parts_filter = config_data.get(
PARTS_FILTER, _CONFIG_FALLBACK[PARTS_FILTER]
)
self.expand_repeats = config_data.get(
EXPAND_REPEATS, _CONFIG_FALLBACK[EXPAND_REPEATS]
)
self.window_size = config_data.get(WINDOW_SIZE, _CONFIG_FALLBACK[WINDOW_SIZE])
self.overlap = config_data.get(OVERLAP, _CONFIG_FALLBACK[OVERLAP])
self.precache_hooks = config_data.get(
PRECACHE_HOOKS, _CONFIG_FALLBACK[PRECACHE_HOOKS]
)
self.mscore_exec = config_data.get(MSCORE_EXEC, _CONFIG_FALLBACK[MSCORE_EXEC])
self.internal_data_dir = path.dirname(internal_data.__file__)
self.check = config_data.get(CHECK_FILE, _CONFIG_FALLBACK[CHECK_FILE])
self._load_metadata()
[docs] def is_requested_musescore_file(self) -> bool:
# TODO: doc
for feature in REQUIRE_MSCORE:
if self.is_requested_feature_category(feature):
return True
return False
[docs] def is_requested_feature_category(self, feature) -> bool:
# TODO: doc
if self.features is None:
return True
return feature in self.features
[docs] def is_requested_module(self, module) -> bool:
# TODO: doc
if self.features is None:
return True
module_path = module.__name__
module_name = (
module_path
if "." not in module_path
else module_path[module_path.rindex(".") + 1 :]
)
for feature in self.features:
if feature.lower().endswith(module_name.lower()):
return True
return False
[docs] def to_dict(self) -> dict:
# TODO: doc
return {
LOG: {
LOG_FILE_PATH: self.log_file,
FILE_LOG_LEVEL: self.file_log_level,
CONSOLE_LOG_LEVEL: self.console_log_level,
},
METADATA_DIR: self.metadata_dir,
XML_DIR: self.xml_dir,
MUSESCORE_DIR: self.musescore_dir,
CACHE_DIR: self.cache_dir,
PARALLEL: self.parallel,
MAX_PROCESSES: self.max_processes,
FEATURES: self.features,
SPLIT_KEYWORDS: list(self.split_keywords),
PARTS_FILTER: list(self.parts_filter),
EXPAND_REPEATS: self.expand_repeats,
}
def _load_metadata(self) -> None:
self.scores_metadata = {
path.basename(file): read_dicts_from_csv(file)
for file in glob(path.join(self.metadata_dir, "score", "*.csv"))
}
if not self.scores_metadata:
print(
"\nMetadata could not be loaded properly!! Check metadata path in config file.\n"
)
self.characters_gender = read_dicts_from_csv(
path.join(self.internal_data_dir, "characters_gender.csv")
)
self.sound_to_abbreviation = read_object_from_json_file(
path.join(self.internal_data_dir, "sound_abbreviation.json")
)
self.abbreviation_to_sound = {
abbreviation: sound
for sound, abbreviation in self.sound_to_abbreviation.items()
}
self.sound_to_family = read_object_from_json_file(
path.join(self.internal_data_dir, "sound_family.json")
)
self.family_to_abbreviation = read_object_from_json_file(
path.join(self.internal_data_dir, "family_abbreviation.json")
)
self.translations_cache = read_object_from_json_file(
path.join(self.internal_data_dir, "translations.json")
)
self.scoring_order = read_object_from_json_file(
path.join(self.internal_data_dir, "scoring_order.json")
)
self.scoring_family_order = read_object_from_json_file(
path.join(self.internal_data_dir, "scoring_family_order.json")
)
self.sorting_lists = read_object_from_json_file(
path.join(self.internal_data_dir, "sorting_lists.json")
)
self.all_translations = read_object_from_json_file(
path.join(self.internal_data_dir, "all_translations.json")
)
self.cpu_workers = (
multiprocessing.cpu_count() - 2
if multiprocessing.cpu_count() > 3
else multiprocessing.cpu_count() // 2
)
[docs]class PostProcess_Configuration:
# TODO: docuemtn this class
# TODO: rename class without underscore
def __init__(self, *args, **kwargs):
config_data = {}
if len(args) > 1:
raise ValueError(
f"Unexpected number of args passed to constructor: {len(args)}"
)
if len(args) > 0:
if isinstance(args[0], str) or isinstance(args[0], PurePath):
config_data = read_object_from_yaml_file(args[0])
elif isinstance(args[0], dict):
config_data = args[0]
elif isinstance(args[0], Configuration):
config_data = args[0].to_dict_post()
else:
raise TypeError(
f"The argument type is {type(args[0])}, and it was expected a dictionary, a Configuration, a string, or a Path object"
)
config_data.update(kwargs) # Override values
log_config = config_data.get(LOG, _CONFIG_FALLBACK[LOG])
self.log_file = log_config.get(
LOG_FILE_PATH, _CONFIG_FALLBACK.get(LOG_FILE_PATH)
)
self.file_log_level = log_config.get(
FILE_LOG_LEVEL, _CONFIG_FALLBACK.get(FILE_LOG_LEVEL)
)
self.console_log_level = log_config.get(
CONSOLE_LOG_LEVEL, _CONFIG_FALLBACK.get(CONSOLE_LOG_LEVEL)
)
create_logger(
LOGGER_NAME, self.log_file, self.file_log_level, self.console_log_level
)
self.internal_data = config_data.get(
INTERNAL_DATA, _CONFIG_POST_FALLBACK[INTERNAL_DATA]
)
self.check_file = config_data.get(CHECK_FILE, _CONFIG_POST_FALLBACK[CHECK_FILE])
self.delete_files = config_data.get(
DELETE_FILES, _CONFIG_POST_FALLBACK[DELETE_FILES]
)
self.grouped_analysis = config_data.get(GROUPED, _CONFIG_POST_FALLBACK[GROUPED])
self.split_passionA = config_data.get(
SPLIT_PASSSIONS, _CONFIG_POST_FALLBACK[SPLIT_PASSSIONS]
)
self.unbundle_instrumentation = config_data.get(
UNBUNDLE_INSTRUMENTATION, _CONFIG_POST_FALLBACK[UNBUNDLE_INSTRUMENTATION]
)
self.merge_voices = config_data.get(
MERGE_VOICES, _CONFIG_POST_FALLBACK[MERGE_VOICES]
)
self.instruments_to_keep = config_data.get(
INSTRUMENTS_TO_KEEP, _CONFIG_POST_FALLBACK[INSTRUMENTS_TO_KEEP]
)
self.instruments_to_kill = config_data.get(
INSTRUMENTS_TO_DELETE, _CONFIG_POST_FALLBACK[INSTRUMENTS_TO_DELETE]
)
self.substring_to_kill = config_data.get(
SUBSTRING_TO_DELETE, _CONFIG_POST_FALLBACK[SUBSTRING_TO_DELETE]
)
self.delete_presence = config_data.get(
PRESENCE, _CONFIG_POST_FALLBACK[PRESENCE]
)
self.columns_endswith = config_data.get(
ENDSWITH, _CONFIG_POST_FALLBACK[ENDSWITH]
)
self.columns_startswith = config_data.get(
STARTSWITH, _CONFIG_POST_FALLBACK[STARTSWITH]
)
self.columns_contain = config_data.get(CONTAIN, _CONFIG_POST_FALLBACK[CONTAIN])
self.replace_nans = config_data.get(
REPLACE_NANS, _CONFIG_POST_FALLBACK[REPLACE_NANS]
)
self.delete_files_without_harmony = config_data.get(
DELETE_HARMONY, _CONFIG_POST_FALLBACK[DELETE_HARMONY]
)
[docs] def to_dict_post(self) -> dict:
return {
LOG: {
LOG_FILE_PATH: self.log_file,
FILE_LOG_LEVEL: self.file_log_level,
CONSOLE_LOG_LEVEL: self.console_log_level,
},
CHECK_FILE: self.check_file,
DELETE_FILES: self.delete_files,
GROUPED: self.grouped_analysis,
SPLIT_PASSSIONS: self.split_passionA,
INSTRUMENTS_TO_KEEP: self.instruments_to_keep,
INSTRUMENTS_TO_DELETE: self.instruments_to_kill,
SUBSTRING_TO_DELETE: self.substring_to_kill,
ENDSWITH: self.columns_endswith,
STARTSWITH: self.columns_startswith,
CONTAIN: self.columns_contain,
PRESENCE: self.delete_presence,
REPLACE_NANS: self.replace_nans,
}