diff --git a/README.md b/README.md index 62b0175..db8218d 100644 --- a/README.md +++ b/README.md @@ -170,6 +170,54 @@ By default, the JSON-LD files generated by RsMetaCheck will only contain informa poetry run rsmetacheck --input https://github.com/tidyverse/tidyverse --verbose ``` +#### Configure Analysis with a Root Config File + +You can configure RsMetaCheck with a TOML file at the repository root named `.rsmetacheck.toml` (auto-detected), or pass a custom path with `--config`. + +Supported options: + +- `ignore`: warnings/pitfalls to ignore (e.g. `P001`, `W002`) +- `exclude_files`: metadata sources to ignore (glob, filename, or substring match) +- `parameters`: per-check parameters for configurable checks +- `profiles`: alternate configurations such as `unstable` or `prerelease` + +Example: + +```toml +ignore = ["W002"] +exclude_files = ["**/generated/**", "tmp_metadata.json"] + +[parameters.P001] +ahead_significant_diff = 2 + +[parameters.W002] +stale_after_days = 3 + +[profiles.unstable] +ignore = ["W002", "P017"] + +[profiles.unstable.parameters.P001] +ahead_significant_diff = 10 + +[profiles.prerelease] +ignore = [] + +[profiles.prerelease.parameters.P001] +ahead_significant_diff = 1 +``` + +Use a specific profile: + +```bash +poetry run rsmetacheck --input https://github.com/example/repo --config-profile unstable +``` + +Use a custom config path: + +```bash +poetry run rsmetacheck --input https://github.com/example/repo --config ./ci/rsmetacheck.toml +``` + ### Output The tool will: diff --git a/docs/usage.md b/docs/usage.md index 66c76c2..fcbfa29 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -35,6 +35,36 @@ Run the analysis: poetry run rsmetacheck --input repositories.json ``` +### Configure Analysis Rules + +RsMetaCheck can load a root-level `.rsmetacheck.toml` file to customize analysis behavior. + +```toml +ignore = ["W002"] +exclude_files = ["tmp_metadata.json"] + +[parameters.P001] +ahead_significant_diff = 10 + +[profiles.prerelease] +ignore = [] + +[profiles.unstable] +ignore = ["W002", "P017"] +``` + +Use a profile: + +```bash +poetry run rsmetacheck --input https://github.com/example/repo --config-profile unstable +``` + +Use an explicit config path: + +```bash +poetry run rsmetacheck --input https://github.com/example/repo --config ./ci/rsmetacheck.toml +``` + ## GitHub Action You can integrate RSMetaCheck into your GitHub workflows: diff --git a/src/rsmetacheck/cli.py b/src/rsmetacheck/cli.py index 0291045..1edac7d 100644 --- a/src/rsmetacheck/cli.py +++ b/src/rsmetacheck/cli.py @@ -2,6 +2,7 @@ import os from pathlib import Path +from rsmetacheck.config import load_analysis_config from rsmetacheck.run_analyzer import run_analysis from rsmetacheck.run_somef import ( ensure_somef_configured, @@ -69,9 +70,28 @@ def cli(): action="store_true", help="Include both detected AND undetected pitfalls in the output JSON-LD.", ) + parser.add_argument( + "--config", + default=None, + help="Path to RsMetaCheck TOML config file (default: auto-detect .rsmetacheck.toml at repository root).", + ) + parser.add_argument( + "--config-profile", + default=None, + help="Name of config profile to apply (e.g., unstable, prerelease).", + ) args = parser.parse_args() + try: + analysis_config = load_analysis_config( + config_path=args.config, + profile=args.config_profile, + ) + except (FileNotFoundError, ValueError, OSError, Exception) as exc: + print(f"Error loading config: {exc}") + return + if args.skip_somef: print( f"Skipping SoMEF execution. Analyzing {len(args.input)} existing SoMEF output files..." @@ -95,6 +115,7 @@ def cli(): args.analysis_output, verbose=args.verbose, notes_output=args.notes_output, + analysis_config=analysis_config, ) else: @@ -110,30 +131,43 @@ def cli(): "Codemeta generation is ENABLED. Codemeta files will be created for each repository." ) + any_somef_success = False + for input_item in args.input: if input_item.startswith("http://") or input_item.startswith("https://"): print(f"Processing repository URL: {input_item}") - run_somef_single( + success = run_somef_single( input_item, somef_output_dir, threshold, branch=args.branch, generate_codemeta=generate_codemeta, ) + any_somef_success = any_somef_success or bool(success) elif os.path.exists(input_item): print(f"Processing repositories from file: {input_item}") - run_somef_batch( + success = run_somef_batch( input_item, somef_output_dir, threshold, branch=args.branch, generate_codemeta=generate_codemeta, ) + any_somef_success = any_somef_success or bool(success) else: print( f"Warning: Skipping invalid input (not a URL or existing file): {input_item}" ) + if not any_somef_success: + print( + "Error: SoMEF did not produce any outputs. Analysis is aborted." + ) + print( + "Fix SoMEF/authentication issues and rerun, or run with --skip-somef on existing SoMEF JSON files." + ) + return + print(f"\nRunning analysis on outputs in {somef_output_dir}...") run_analysis( somef_output_dir, @@ -141,6 +175,7 @@ def cli(): args.analysis_output, verbose=args.verbose, notes_output=args.notes_output, + analysis_config=analysis_config, ) diff --git a/src/rsmetacheck/detect_pitfalls_main.py b/src/rsmetacheck/detect_pitfalls_main.py index 1457b82..9b86c35 100644 --- a/src/rsmetacheck/detect_pitfalls_main.py +++ b/src/rsmetacheck/detect_pitfalls_main.py @@ -1,7 +1,11 @@ import json +import copy +import fnmatch +import inspect from pathlib import Path from typing import Iterable, Union from rsmetacheck.run_somef import CODEMETA_DEFAULT_NAME +from rsmetacheck.config import AnalysisConfig from rsmetacheck.utils.pitfall_utils import extract_programming_languages from rsmetacheck.utils.json_ld_utils import create_pitfall_jsonld, save_individual_pitfall_jsonld from rsmetacheck.utils.somef_compat import normalize_somef_data @@ -40,7 +44,89 @@ from rsmetacheck.scripts.warnings.w010 import detect_git_remote_shorthand_pitfall -def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[str, Path], output_file: Union[str, Path], verbose: bool = False, notes_output: Union[str, Path] = None): +def _source_matches_exclude_patterns(source_value: str, exclude_patterns: list[str]) -> bool: + source = str(source_value) + basename = Path(source).name + + for pattern in exclude_patterns: + if fnmatch.fnmatch(source, pattern): + return True + if fnmatch.fnmatch(basename, pattern): + return True + if pattern in source: + return True + + return False + + +def _filter_somef_data_by_excluded_files(data, exclude_patterns: list[str]): + if isinstance(data, dict): + filtered_dict = {} + + for key, value in data.items(): + if key == "source": + if isinstance(value, list): + kept_sources = [ + src for src in value if not _source_matches_exclude_patterns(src, exclude_patterns) + ] + if not kept_sources: + return None + filtered_dict[key] = kept_sources + else: + if _source_matches_exclude_patterns(value, exclude_patterns): + return None + filtered_dict[key] = value + continue + + filtered_value = _filter_somef_data_by_excluded_files(value, exclude_patterns) + if filtered_value is not None: + filtered_dict[key] = filtered_value + + return filtered_dict + + if isinstance(data, list): + filtered_list = [] + for item in data: + filtered_item = _filter_somef_data_by_excluded_files(item, exclude_patterns) + if filtered_item is not None: + filtered_list.append(filtered_item) + return filtered_list + + return data + + +def _run_detector_with_parameters(detector_func, somef_data, file_name: str, parameters: dict): + if not parameters: + return detector_func(somef_data, file_name) + + signature = inspect.signature(detector_func) + accepts_kwargs = any( + param.kind == inspect.Parameter.VAR_KEYWORD + for param in signature.parameters.values() + ) + + if accepts_kwargs: + return detector_func(somef_data, file_name, **parameters) + + accepted_parameter_names = set(signature.parameters.keys()) - {"somef_data", "file_name"} + filtered_parameters = { + key: value for key, value in parameters.items() if key in accepted_parameter_names + } + + if filtered_parameters: + return detector_func(somef_data, file_name, **filtered_parameters) + + return detector_func(somef_data, file_name) + + +def detect_all_pitfalls( + json_files: Iterable[Path], + pitfalls_output_dir: Union[str, Path], + output_file: Union[str, Path], + verbose: bool = False, + notes_output: Union[str, Path] = None, + analysis_config: AnalysisConfig = None, +): """ Detect all software repository pitfalls in SoMEF output files using modular detectors. Now also generates individual JSON-LD files for each repository. @@ -49,12 +135,21 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s pitfalls_output_dir = Path(pitfalls_output_dir) pitfalls_output_dir.mkdir(exist_ok=True, parents=True) json_files = list(json_files) + config = analysis_config or AnalysisConfig.empty() if not json_files: print("No JSON files found for analysis.") return print(f"Analyzing {len(json_files)} SoMEF JSON files...") + if config.source_path: + print(f"Using config file: {config.source_path}") + if config.profile: + print(f"Using config profile: {config.profile}") + if config.ignored_checks: + print(f"Ignoring checks: {', '.join(sorted(config.ignored_checks))}") + if config.exclude_files: + print(f"Excluded source patterns: {config.exclude_files}") results = { "summary": { @@ -321,6 +416,13 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s somef_data = json.load(f) somef_data = normalize_somef_data(somef_data) + if config.exclude_files: + somef_data = _filter_somef_data_by_excluded_files( + copy.deepcopy(somef_data), + config.exclude_files, + ) + if somef_data is None: + somef_data = {} languages = extract_programming_languages(somef_data) @@ -330,8 +432,16 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s repo_pitfall_results = [] for idx, (detector_func, pitfall_code) in enumerate(pitfall_detectors): + if config.is_ignored(pitfall_code): + continue + try: - detector_results = detector_func(somef_data, json_file.name) + detector_results = _run_detector_with_parameters( + detector_func, + somef_data, + json_file.name, + config.get_parameters(pitfall_code), + ) if not isinstance(detector_results, list): detector_results = [detector_results] @@ -500,7 +610,15 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s print(f"Error writing output file: {e}") -def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_output=None, verbose=False, notes_output=None): +def main( + input_dir=None, + somef_json_paths=None, + pitfalls_dir=None, + analysis_output=None, + verbose=False, + notes_output=None, + analysis_config: AnalysisConfig = None, +): """ Main function to run all pitfall detections. @@ -541,7 +659,14 @@ def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_outp print("No JSON files found for analysis.") return - detect_all_pitfalls(json_files, pitfalls_directory, output_file, verbose, notes_output) + detect_all_pitfalls( + json_files, + pitfalls_directory, + output_file, + verbose, + notes_output, + analysis_config=analysis_config, + ) if __name__ == "__main__": main() diff --git a/src/rsmetacheck/run_analyzer.py b/src/rsmetacheck/run_analyzer.py index 56b91c9..20264a5 100644 --- a/src/rsmetacheck/run_analyzer.py +++ b/src/rsmetacheck/run_analyzer.py @@ -1,9 +1,17 @@ from pathlib import Path from typing import Union, Iterable from rsmetacheck.detect_pitfalls_main import main +from rsmetacheck.config import AnalysisConfig -def run_analysis(somef_input: Union[str, Path, Iterable[Path]], pitfalls_dir: Union[str, Path], analysis_file: Union[str, Path], verbose: bool = False, notes_output: Union[str, Path] = None): +def run_analysis( + somef_input: Union[str, Path, Iterable[Path]], + pitfalls_dir: Union[str, Path], + analysis_file: Union[str, Path], + verbose: bool = False, + notes_output: Union[str, Path] = None, + analysis_config: AnalysisConfig = None, +): """ Run metadata analysis using existing code. @@ -21,10 +29,24 @@ def run_analysis(somef_input: Union[str, Path, Iterable[Path]], pitfalls_dir: Un somef_path = Path(somef_input) if somef_path.is_dir(): print(f"Using directory: {somef_input}") - main(input_dir=somef_input, pitfalls_dir=pitfalls_dir, analysis_output=analysis_file, verbose=verbose, notes_output=notes_output) + main( + input_dir=somef_input, + pitfalls_dir=pitfalls_dir, + analysis_output=analysis_file, + verbose=verbose, + notes_output=notes_output, + analysis_config=analysis_config, + ) else: print(f"Error: {somef_input} is not a valid directory") else: json_files = list(somef_input) print(f"Using {len(json_files)} specified JSON files") - main(somef_json_paths=json_files, pitfalls_dir=pitfalls_dir, analysis_output=analysis_file, verbose=verbose, notes_output=notes_output) \ No newline at end of file + main( + somef_json_paths=json_files, + pitfalls_dir=pitfalls_dir, + analysis_output=analysis_file, + verbose=verbose, + notes_output=notes_output, + analysis_config=analysis_config, + ) \ No newline at end of file diff --git a/src/rsmetacheck/run_somef.py b/src/rsmetacheck/run_somef.py index a2172a3..6cab7b9 100644 --- a/src/rsmetacheck/run_somef.py +++ b/src/rsmetacheck/run_somef.py @@ -29,10 +29,22 @@ def run_somef(repo_url, output_file, threshold, branch=None, codemeta_file=None) if codemeta_file: cmd.extend(["-c", codemeta_file]) try: - subprocess.run(cmd, check=True) + subprocess.run(cmd, check=True, capture_output=True, text=True) print(f"SoMEF finished for: {repo_url}") return True except subprocess.CalledProcessError as e: + stderr = (e.stderr or "").strip() + stdout = (e.stdout or "").strip() + combined_output = "\n".join(part for part in [stderr, stdout] if part) + + if "GitHub token lacks required permissions or scopes" in combined_output: + print( + "SoMEF failed due to an invalid/insufficient GitHub token configured in SoMEF." + ) + print( + "Run `somef configure` and set a token with appropriate scopes, or remove the token from ~/.somef/config.json." + ) + print(f"Error running SoMEF for {repo_url}: {e}") return False @@ -58,7 +70,7 @@ def run_somef_single( branch, codemeta_file=codemeta_file if generate_codemeta else None, ) - return output_dir if success else None + return bool(success) def run_somef_batch( @@ -82,28 +94,22 @@ def run_somef_batch( base_name = os.path.splitext(os.path.basename(json_file))[0] print(f"Running SoMEF for {len(repos)} repositories in {base_name}...") + success_count = 0 + for idx, repo_url in enumerate(repos, start=1): output_file = os.path.join(output_dir, f"{base_name}_output_{idx}.json") codemeta_file = os.path.join( output_dir, f"{base_name}_{CODEMETA_DEFAULT_NAME}_{idx}.json" ) print(f"[{idx}/{len(repos)}] {repo_url}") - run_somef( + if run_somef( repo_url, output_file, threshold, branch, codemeta_file=codemeta_file if generate_codemeta else None, - ) + ): + success_count += 1 print(f"Completed SoMEF for {base_name}. Results in {output_dir}") - return True - - success = run_somef( - repo_url, - output_file, - threshold, - branch, - codemeta_file=codemeta_file if generate_codemeta else None, - ) - return output_dir if success else None + return success_count > 0 diff --git a/src/rsmetacheck/scripts/pitfalls/p001.py b/src/rsmetacheck/scripts/pitfalls/p001.py index e30164f..7f62b48 100644 --- a/src/rsmetacheck/scripts/pitfalls/p001.py +++ b/src/rsmetacheck/scripts/pitfalls/p001.py @@ -13,11 +13,11 @@ def _parse_version_components(version_str: str) -> tuple: return tuple(components) -def _version_diff_significant(v1: str, v2: str) -> bool: +def _version_diff_significant(v1: str, v2: str, threshold: int = 2) -> bool: c1 = _parse_version_components(v1) c2 = _parse_version_components(v2) for a, b in zip(c1, c2): - if abs(a - b) >= 2: + if abs(a - b) >= threshold: return True return False @@ -95,8 +95,11 @@ def extract_latest_release_version(somef_data: Dict) -> Optional[str]: return None - -def detect_version_mismatch(somef_data: Dict, file_name: str) -> list: +def detect_version_mismatch( + somef_data: Dict, + file_name: str, + ahead_significant_diff: int = 2, +) -> list: """ Detect version mismatches between metadata files and the latest release. Returns a single result with all mismatched sources merged into one evidence message. @@ -132,8 +135,22 @@ def detect_version_mismatch(somef_data: Dict, file_name: str) -> list: }) if _metadata_ahead_of_release(metadata_version, normalized_release_version): - if _version_diff_significant(metadata_version, normalized_release_version): - pitfall_sources.append(metadata_source_file) + if _version_diff_significant( + metadata_version, + normalized_release_version, + threshold=ahead_significant_diff, + ): + results.append({ + "has_pitfall": True, + "has_note": False, + "file_name": file_name, + "metadata_version": metadata_version, + "release_version": normalized_release_version, + "metadata_source": md_info["source"], + "metadata_source_file": metadata_source_file, + "note_text": None, + "notes": [] + }) else: note_sources.append(metadata_source_file) else: diff --git a/src/rsmetacheck/scripts/warnings/w002.py b/src/rsmetacheck/scripts/warnings/w002.py index 1a3afa4..fd9aa2c 100644 --- a/src/rsmetacheck/scripts/warnings/w002.py +++ b/src/rsmetacheck/scripts/warnings/w002.py @@ -101,7 +101,11 @@ def calculate_date_difference_days(date1: datetime, date2: datetime) -> int: return diff -def detect_outdated_datemodified(somef_data: Dict, file_name: str) -> Dict: +def detect_outdated_datemodified( + somef_data: Dict, + file_name: str, + stale_after_days: int = 1, +) -> Dict: """ Detect outdated dateModified in codemeta.json warning for a single repository. Returns detection result with warning info. @@ -140,7 +144,7 @@ def detect_outdated_datemodified(somef_data: Dict, file_name: str) -> Dict: difference_days = calculate_date_difference_days(github_date_parsed, codemeta_date_parsed) result["difference_days"] = difference_days - if github_date_parsed > codemeta_date_parsed and difference_days > 1: + if github_date_parsed > codemeta_date_parsed and difference_days > stale_after_days: result["has_warning"] = True return result \ No newline at end of file diff --git a/tests/test_cli.py b/tests/test_cli.py index 37cda57..3079a8d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,6 +3,8 @@ import importlib from unittest.mock import MagicMock +from rsmetacheck.config import AnalysisConfig + cli_module = importlib.import_module("rsmetacheck.cli") REPO_URL = "https://github.com/SoftwareUnderstanding/sw-metadata-bot" @@ -438,6 +440,30 @@ def test_cli_multiple_inputs_triggers_multiple_calls(monkeypatch, tmp_path): run_somef_batch_mock.assert_called_once() +def test_cli_aborts_analysis_when_somef_produces_no_outputs(monkeypatch, capsys): + """When all SoMEF runs fail, CLI should stop before run_analysis.""" + run_analysis_mock = MagicMock() + run_somef_single_mock = MagicMock(return_value=False) + + monkeypatch.setattr( + "sys.argv", + [ + "rsmetacheck", + "--input", + REPO_URL, + ], + ) + monkeypatch.setattr(cli_module, "ensure_somef_configured", lambda: True) + monkeypatch.setattr(cli_module, "run_analysis", run_analysis_mock) + monkeypatch.setattr(cli_module, "run_somef_single", run_somef_single_mock) + + cli_module.cli() + + captured = capsys.readouterr() + assert "SoMEF did not produce any outputs" in captured.out + run_analysis_mock.assert_not_called() + + def test_cli_input_required(monkeypatch): """CLI should fail if --input is not provided.""" monkeypatch.setattr( @@ -450,3 +476,66 @@ def test_cli_input_required(monkeypatch): assert False, "Expected SystemExit" except SystemExit: pass + + +def test_cli_config_profile_forwarded_to_run_analysis(monkeypatch, tmp_path): + """--config and --config-profile should load and forward analysis config.""" + somef_file = tmp_path / "somef_output.json" + somef_file.write_text("{}") + + run_analysis_mock = MagicMock() + expected_config = AnalysisConfig(profile="unstable") + load_config_mock = MagicMock(return_value=expected_config) + + monkeypatch.setattr( + "sys.argv", + [ + "rsmetacheck", + "--input", + str(somef_file), + "--skip-somef", + "--config", + "custom.toml", + "--config-profile", + "unstable", + ], + ) + monkeypatch.setattr(cli_module, "load_analysis_config", load_config_mock) + monkeypatch.setattr(cli_module, "run_analysis", run_analysis_mock) + + cli_module.cli() + + load_config_mock.assert_called_once_with(config_path="custom.toml", profile="unstable") + assert run_analysis_mock.call_args.kwargs["analysis_config"] is expected_config + + +def test_cli_config_load_error_stops_execution(monkeypatch, tmp_path, capsys): + """Config loading errors should stop execution and print a message.""" + somef_file = tmp_path / "somef_output.json" + somef_file.write_text("{}") + + run_analysis_mock = MagicMock() + + monkeypatch.setattr( + "sys.argv", + [ + "rsmetacheck", + "--input", + str(somef_file), + "--skip-somef", + "--config", + "missing.toml", + ], + ) + monkeypatch.setattr( + cli_module, + "load_analysis_config", + MagicMock(side_effect=FileNotFoundError("missing")), + ) + monkeypatch.setattr(cli_module, "run_analysis", run_analysis_mock) + + cli_module.cli() + + captured = capsys.readouterr() + assert "Error loading config" in captured.out + run_analysis_mock.assert_not_called() diff --git a/tests/test_e2e.py b/tests/test_e2e.py index bbc91e9..f70949c 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -3,6 +3,7 @@ import json from pathlib import Path +from rsmetacheck.config import AnalysisConfig from rsmetacheck.detect_pitfalls_main import detect_all_pitfalls from rsmetacheck.detect_pitfalls_main import main as detect_pitfalls_main @@ -34,6 +35,14 @@ def _write_somef_file(dir_path, filename, somef_data): return filepath +def _find_issue_count(summary_data, code): + for item in summary_data["pitfalls & warnings"]: + current_code = item.get("pitfall_code") or item.get("warning_code") + if current_code == code: + return item["count"] + raise AssertionError(f"Code not found in summary: {code}") + + class TestSingleRepoPipeline: """Tests with a single SoMEF input file.""" @@ -361,3 +370,89 @@ def test_pipeline_continues_after_one_bad_file(self, tmp_path): summary = json.loads(summary_file.read_text()) assert summary["summary"]["total_repositories_analyzed"] == 2 + + +class TestAnalysisConfiguration: + """Tests for root config behavior in analysis.""" + + def test_ignore_check_code_skips_detector(self, tmp_path): + somef_dir = tmp_path / "somef_inputs" + somef_dir.mkdir() + pitfalls_dir = tmp_path / "pitfalls_outputs" + summary_file = tmp_path / "summary.json" + + _write_somef_file( + somef_dir, + "repo_1.json", + _make_somef_data(version="2.0.0", release_tag="1.0.0"), + ) + + detect_all_pitfalls( + list(somef_dir.glob("*.json")), + pitfalls_dir, + summary_file, + analysis_config=AnalysisConfig(ignored_checks={"P001"}), + ) + + summary = json.loads(summary_file.read_text()) + assert _find_issue_count(summary, "P001") == 0 + + def test_p001_threshold_override_changes_note_to_pitfall(self, tmp_path): + somef_dir = tmp_path / "somef_inputs" + somef_dir.mkdir() + pitfalls_dir = tmp_path / "pitfalls_outputs" + summary_file = tmp_path / "summary.json" + + _write_somef_file( + somef_dir, + "repo_1.json", + _make_somef_data(version="0.4.3", release_tag="0.4.2"), + ) + + detect_all_pitfalls( + list(somef_dir.glob("*.json")), + pitfalls_dir, + summary_file, + analysis_config=AnalysisConfig( + check_parameters={"P001": {"ahead_significant_diff": 1}} + ), + ) + + summary = json.loads(summary_file.read_text()) + assert _find_issue_count(summary, "P001") == 1 + + def test_exclude_files_removes_source_from_checks(self, tmp_path): + somef_dir = tmp_path / "somef_inputs" + somef_dir.mkdir() + pitfalls_dir = tmp_path / "pitfalls_outputs" + summary_file = tmp_path / "summary.json" + + somef_data = { + "full_name": [{"result": {"value": "owner/repo"}}], + "code_repository": [{"result": {"value": "https://github.com/owner/repo"}}], + "version": [ + { + "source": "repository/codemeta.json", + "result": {"value": "2.0.0"}, + } + ], + "releases": [{"tag": "1.0.0"}], + "description": [], + "name": [], + "owner": [], + "date_created": [], + "date_updated": [], + "license": [], + "programming_languages": [], + } + _write_somef_file(somef_dir, "repo_1.json", somef_data) + + detect_all_pitfalls( + list(somef_dir.glob("*.json")), + pitfalls_dir, + summary_file, + analysis_config=AnalysisConfig(exclude_files=["codemeta.json"]), + ) + + summary = json.loads(summary_file.read_text()) + assert _find_issue_count(summary, "P001") == 0