diff --git a/extra_tests/regression_fits/trainable_prepro.yml b/extra_tests/regression_fits/trainable_prepro.yml index 132620e0b73a6a087b5663350347debd92d6e5f5..051bf17658cc65104fa0cd166b5c75b3a1582b48 100644 --- a/extra_tests/regression_fits/trainable_prepro.yml +++ b/extra_tests/regression_fits/trainable_prepro.yml @@ -35,8 +35,7 @@ mcseed: 1 load: "weights.weights.h5" -sampling: - separate_multiplicative: True +separate_multiplicative: True parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [15, 10, 8] diff --git a/n3fit/runcards/examples/Basic_feature_scaling.yml b/n3fit/runcards/examples/Basic_feature_scaling.yml index a6d3e8e2241abe6295c8bc3ec12ad31e8fdded73..c240b925c2d26db380beb2d3bfbe75ffe8bf29ff 100644 --- a/n3fit/runcards/examples/Basic_feature_scaling.yml +++ b/n3fit/runcards/examples/Basic_feature_scaling.yml @@ -22,8 +22,7 @@ datacuts: ############################################################ theory: theoryid: 708 # database id -sampling: - separate_multiplicative: true +separate_multiplicative: True ############################################################ trvlseed: 1 diff --git a/n3fit/runcards/examples/Basic_hyperopt.yml b/n3fit/runcards/examples/Basic_hyperopt.yml index ded3f8b392a2d4280009db4e61a48d253d067b8d..bdf8a4e08e6b911c2dbb0ce191f6a01a10a71de7 100644 --- a/n3fit/runcards/examples/Basic_hyperopt.yml +++ b/n3fit/runcards/examples/Basic_hyperopt.yml @@ -40,8 +40,7 @@ datacuts: theory: theoryid: 708 # database id -sampling: - separate_multiplicative: true +separate_multiplicative: True ############################################################ hyperscan_config: diff --git a/n3fit/runcards/examples/Basic_runcard.yml b/n3fit/runcards/examples/Basic_runcard.yml index f315d80f450dfe8d1fb413ab5db191df286caf7b..f2231d6e2b2a72cb0051f81c4cb05d04267d6f98 100644 --- a/n3fit/runcards/examples/Basic_runcard.yml +++ b/n3fit/runcards/examples/Basic_runcard.yml @@ -24,9 +24,8 @@ theory: theoryid: 708 # database id resample_negative_pseudodata: True +separate_multiplicative: True -sampling: - separate_multiplicative: true parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [15, 10, 8] activation_per_layer: ['sigmoid', 'sigmoid', 'linear'] diff --git a/n3fit/runcards/examples/Basic_runcard_closure_test.yml b/n3fit/runcards/examples/Basic_runcard_closure_test.yml index 564c6bf7611339053951dd4b40ea201a51659f29..3033ee7a708fdb17b3fc2d558ddd8323dce3d20b 100644 --- a/n3fit/runcards/examples/Basic_runcard_closure_test.yml +++ b/n3fit/runcards/examples/Basic_runcard_closure_test.yml @@ -23,8 +23,7 @@ datacuts: theory: theoryid: 708 # database id -sampling: - separate_multiplicative: true +separate_multiplicative: True parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [15, 10, 8] activation_per_layer: ['sigmoid', 'sigmoid', 'linear'] diff --git a/n3fit/runcards/examples/Basic_runcard_parallel.yml b/n3fit/runcards/examples/Basic_runcard_parallel.yml index a515fe04a6c6100c88ca2271cb016c45b98df824..478c76b4b63132f6def4a0db15f17c87f03c5b48 100644 --- a/n3fit/runcards/examples/Basic_runcard_parallel.yml +++ b/n3fit/runcards/examples/Basic_runcard_parallel.yml @@ -24,8 +24,7 @@ datacuts: theory: theoryid: 708 # database id -sampling: - separate_multiplicative: true +separate_multiplicative: True ############################################################ trvlseed: 1 nnseed: 2 diff --git a/n3fit/runcards/examples/DIS_diagonal_l2reg_example.yml b/n3fit/runcards/examples/DIS_diagonal_l2reg_example.yml index 7de4b5ce7fce693ff73628dfa04f7d72546054ba..d699bf8ab1c51060d09644fdaebc974409b47775 100644 --- a/n3fit/runcards/examples/DIS_diagonal_l2reg_example.yml +++ b/n3fit/runcards/examples/DIS_diagonal_l2reg_example.yml @@ -39,8 +39,7 @@ datacuts: theory: theoryid: 708 # database id -sampling: - separate_multiplicative: true +separate_multiplicative: True ############################################################ trvlseed: 1 nnseed: 2 diff --git a/n3fit/runcards/examples/developing.yml b/n3fit/runcards/examples/developing.yml index a2e3789ec86f0522f75ec4e53e40391f1032f1aa..a5abe4b93201f8d0e0f103a4fe7217f010079baa 100644 --- a/n3fit/runcards/examples/developing.yml +++ b/n3fit/runcards/examples/developing.yml @@ -52,8 +52,7 @@ datacuts: theory: theoryid: 40000000 -sampling: - separate_multiplicative: true +separate_multiplicative: True ############################################################ trvlseed: 1 nnseed: 2 diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 9c68bbb5e1f07a861c2a5f3e804ba637a0b2652e..80a21e4b74e3fcd0e0dbd86d646c029e6a3a8f17 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -76,8 +76,8 @@ class N3FitEnvironment(Environment): # create output folder for the fit self.replica_path = self.output_path / "nnfit" for replica in self.replicas: - path = self.replica_path / "replica_{0}".format(replica) - log.info("Creating replica output folder in {0}".format(path)) + path = self.replica_path / f"replica_{replica}" + log.info(f"Creating replica output folder in {path}") try: path.mkdir(exist_ok=True) except OSError as e: @@ -177,11 +177,6 @@ class N3FitConfig(Config): N3FIT_FIXED_CONFIG['use_scalevar_uncertainties'] = thconfig.get( 'use_scalevar_uncertainties', True ) - # Sampling flags - if (sam_t0 := file_content.get('sampling')) is not None: - N3FIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get( - 'separate_multiplicative', False - ) # Fitting flag file_content.update(N3FIT_FIXED_CONFIG) return cls(file_content, *args, **kwargs) @@ -261,7 +256,7 @@ class N3FitApp(App): config_class = N3FitConfig def __init__(self): - super(N3FitApp, self).__init__(name="n3fit", providers=N3FIT_PROVIDERS) + super().__init__(name="n3fit", providers=N3FIT_PROVIDERS) @property def argparser(self): diff --git a/n3fit/src/n3fit/tests/regressions/quickcard-sequential.yml b/n3fit/src/n3fit/tests/regressions/quickcard-sequential.yml index 4608267ee227961ca81f6d2e2bd95f15f610bbe8..1c659c0b2863a5d6462ada444c8b450bb140cab1 100644 --- a/n3fit/src/n3fit/tests/regressions/quickcard-sequential.yml +++ b/n3fit/src/n3fit/tests/regressions/quickcard-sequential.yml @@ -31,8 +31,7 @@ nnseed: 2 mcseed: 1 save: weights.weights.h5 -sampling: - separate_multiplicative: true +separate_multiplicative: True parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [15, 10, 8] diff --git a/n3fit/src/n3fit/tests/regressions/quickcard.yml b/n3fit/src/n3fit/tests/regressions/quickcard.yml index 19b32652dc78a701153b6eca9d6d332ecfc254bd..69fb60984d805c53aac4d472bd3872a5ea440dd7 100644 --- a/n3fit/src/n3fit/tests/regressions/quickcard.yml +++ b/n3fit/src/n3fit/tests/regressions/quickcard.yml @@ -34,8 +34,7 @@ nnseed: 2 mcseed: 1 load: "weights.weights.h5" -sampling: - separate_multiplicative: true +separate_multiplicative: True parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [15, 10, 8] diff --git a/n3fit/src/n3fit/tests/regressions/quickcard_pol.yml b/n3fit/src/n3fit/tests/regressions/quickcard_pol.yml index 8702cb5229c614f44874c949dacad580719367cf..84668efe131eec6cdb127a9a467d4e176b76de14 100644 --- a/n3fit/src/n3fit/tests/regressions/quickcard_pol.yml +++ b/n3fit/src/n3fit/tests/regressions/quickcard_pol.yml @@ -32,8 +32,7 @@ nnseed: 2 mcseed: 1 load: "weights_pol.weights.h5" -sampling: - separate_multiplicative: true +separate_multiplicative: True parameters: nodes_per_layer: [25, 20, 4] diff --git a/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml b/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml index 282f9828764bcfcf2b3777ef6feb2231e731f0d0..c840cf22fd64353ddb27e83aeb0b6a2c8426abf3 100644 --- a/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml +++ b/n3fit/src/n3fit/tests/regressions/quickcard_qed.yml @@ -34,8 +34,7 @@ nnseed: 2 mcseed: 1 load: "weights.weights.h5" -sampling: - separate_multiplicative: true +separate_multiplicative: True parameters: # This defines the parameter dictionary that is passed to the Model Trainer nodes_per_layer: [15, 10, 8] diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 1880fda0540fffe491aa29ebcd73c1a0cd4959e8..bdd94e213c2c07d9c0033e3bb5dfa5b12c40fba3 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -701,12 +701,14 @@ class CoreConfig(configparser.Config): return covmats.dataset_inputs_t0_total_covmat return covmats.dataset_inputs_t0_exp_covmat + def produce_sep_mult(self, separate_multiplicative=False): + if separate_multiplicative is False: + return False + return True + @configparser.explicit_node def produce_dataset_inputs_sampling_covmat( - self, - separate_multiplicative=False, - theory_covmat_flag=False, - use_thcovmat_in_sampling=False, + self, sep_mult=False, theory_covmat_flag=False, use_thcovmat_in_sampling=False ): """ Produces the correct covmat to be used in make_replica according @@ -716,12 +718,12 @@ class CoreConfig(configparser.Config): from validphys import covmats if theory_covmat_flag and use_thcovmat_in_sampling: - if separate_multiplicative: + if sep_mult: return covmats.dataset_inputs_total_covmat_separate else: return covmats.dataset_inputs_total_covmat else: - if separate_multiplicative: + if sep_mult: return covmats.dataset_inputs_exp_covmat_separate else: return covmats.dataset_inputs_exp_covmat diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 5a9a73e470d60f116dddc3a47be63c666ef8a6d7..780f00eb091aa5f30494ab78e977d92fe1d8f2a4 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -191,7 +191,7 @@ def export_mask(path, mask): np.savetxt(path, mask, fmt='%d') -def filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, separate_multiplicative): +def filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, sep_mult): """Filter closure data. In addition to cutting data points, the data is generated from an underlying ``fakepdf``, applying a shift to the data if ``fakenoise`` is ``True``, which emulates the experimental central values @@ -199,19 +199,11 @@ def filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, separ """ log.info('Filtering closure-test data.') - return _filter_closure_data( - filter_path, data, fakepdf, fakenoise, filterseed, separate_multiplicative - ) + return _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, sep_mult) def filter_closure_data_by_experiment( - filter_path, - experiments_data, - fakepdf, - fakenoise, - filterseed, - data_index, - separate_multiplicative, + filter_path, experiments_data, fakepdf, fakenoise, filterseed, data_index, sep_mult ): """ Like :py:func:`filter_closure_data` except filters data by experiment. @@ -228,13 +220,7 @@ def filter_closure_data_by_experiment( experiment_index = data_index[data_index.isin([exp.name], level=0)] res.append( _filter_closure_data( - filter_path, - exp, - fakepdf, - fakenoise, - filterseed, - experiment_index, - separate_multiplicative, + filter_path, exp, fakepdf, fakenoise, filterseed, experiment_index, sep_mult ) ) @@ -285,9 +271,7 @@ def _filter_real_data(filter_path, data): return total_data_points, total_cut_data_points -def _filter_closure_data( - filter_path, data, fakepdf, fakenoise, filterseed, data_index, separate_multiplicative -): +def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, data_index, sep_mult): """ This function is accessed within a closure test only, that is, the fakedata namespace has to be True (If fakedata = False, the _filter_real_data function @@ -352,9 +336,7 @@ def _filter_closure_data( if fakenoise: # ======= Level 1 closure test =======# - closure_data = make_level1_data( - data, closure_data, filterseed, data_index, separate_multiplicative - ) + closure_data = make_level1_data(data, closure_data, filterseed, data_index, sep_mult) # ====== write commondata and systype files ======# if fakenoise: @@ -411,7 +393,9 @@ def check_positivity(posdatasets): log.info('Verifying positivity tables:') for pos in posdatasets: pos.load_commondata() - log.info(f'{pos.name} checked, {len(pos.cuts.load())}/{pos.commondata.ndata} datapoints passed kinematic cuts.') + log.info( + f'{pos.name} checked, {len(pos.cuts.load())}/{pos.commondata.ndata} datapoints passed kinematic cuts.' + ) def check_integrability(integdatasets): diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index a0083bb80369c1061161ea51993e62fdcdb40678..d29255a9c7db0b7be53ce64d60402853777da62b 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -126,7 +126,7 @@ def make_replica( groups_dataset_inputs_loaded_cd_with_cuts, replica_mcseed, dataset_inputs_sampling_covmat, - separate_multiplicative=False, + sep_mult=False, genrep=True, max_tries=int(1e6), resample_negative_pseudodata=True, @@ -152,7 +152,7 @@ def make_replica( dataset_inputs_sampling_covmat: np.array Full covmat to be used. It can be either only experimental or also theoretical. - separate_multiplicative: bool + sep_mult: bool Specifies whether computing the shifts with the full covmat or whether multiplicative errors should be separated @@ -219,9 +219,9 @@ def make_replica( pseudodata = cd.central_values.to_numpy() pseudodatas.append(pseudodata) - # Separation of multiplicative errors. If separate_multiplicative is True also the exp_covmat is produced + # Separation of multiplicative errors. If sep_mult is True also the exp_covmat is produced # without multiplicative errors - if separate_multiplicative: + if sep_mult: mult_errors = cd.multiplicative_errors mult_uncorr_errors = mult_errors.loc[:, mult_errors.columns == "UNCORR"].to_numpy() mult_corr_errors = mult_errors.loc[:, mult_errors.columns == "CORR"].to_numpy() @@ -234,7 +234,7 @@ def make_replica( else: check_positive_masks.append(np.ones_like(pseudodata, dtype=bool)) # concatenating special multiplicative errors, pseudodatas and positive mask - if separate_multiplicative: + if sep_mult: special_mult_errors = pd.concat(special_mult, axis=0, sort=True).fillna(0).to_numpy() all_pseudodata = np.concatenate(pseudodatas, axis=0) full_mask = np.concatenate(check_positive_masks, axis=0) @@ -255,10 +255,10 @@ def make_replica( mult_shifts.append(mult_shift) - # If separate_multiplicative is true then the multiplicative shifts were not included in the covmat + # If sep_mult is true then the multiplicative shifts were not included in the covmat shifts = covmat_sqrt @ rng.normal(size=covmat.shape[1]) mult_part = 1.0 - if separate_multiplicative: + if sep_mult: special_mult = ( 1 + special_mult_errors * rng.normal(size=(1, special_mult_errors.shape[1])) / 100 ).prod(axis=1) @@ -329,7 +329,7 @@ def level0_commondata_wc(data, fakepdf): return level0_commondata_instances_wc -def make_level1_data(data, level0_commondata_wc, filterseed, data_index, separate_multiplicative): +def make_level1_data(data, level0_commondata_wc, filterseed, data_index, sep_mult): """ Given a list of Level 0 commondata instances, return the same list with central values replaced by Level 1 data. @@ -395,16 +395,12 @@ def make_level1_data(data, level0_commondata_wc, filterseed, data_index, separat use_weights_in_covmat=False, norm_threshold=None, _list_of_central_values=None, - _only_additive=separate_multiplicative, + _only_additive=sep_mult, ) # ================== generation of Level1 data ======================# level1_data = make_replica( - level0_commondata_wc, - filterseed, - covmat, - separate_multiplicative=separate_multiplicative, - genrep=True, + level0_commondata_wc, filterseed, covmat, sep_mult=sep_mult, genrep=True ) indexed_level1_data = indexed_make_replica(data_index, level1_data) diff --git a/validphys2/src/validphys/tests/test_overfit_metric.py b/validphys2/src/validphys/tests/test_overfit_metric.py index 342f21f79c61a5b2e61afec13b590b7d74dae420..f22fd283a5d8f59bba53dee4fc0410de1c5945a8 100644 --- a/validphys2/src/validphys/tests/test_overfit_metric.py +++ b/validphys2/src/validphys/tests/test_overfit_metric.py @@ -17,7 +17,7 @@ config = { "t0pdfset": {"from_": "datacuts"}, "pdf": {"from_": "fit"}, "dataset_inputs": {"from_": "fit"}, - "separate_multiplicative": True + "separate_multiplicative": True, } diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index be6e02ebe5af0175161958dbc9d845da630d6465..a85aac4309a1f88a5c6edaf8bd339895f3e3116a 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -7,6 +7,7 @@ which has the pseudodata saved as training and validation splits. This is used to benchmark the correctness of the pseudodata recreation. """ + from numpy.testing import assert_allclose import pandas as pd import pytest