Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,54 @@ By default, the JSON-LD files generated by RsMetaCheck will only contain informa
poetry run rsmetacheck --input https://github.com/tidyverse/tidyverse --verbose
```

#### Configure Analysis with a Root Config File

You can configure RsMetaCheck with a TOML file at the repository root named `.rsmetacheck.toml` (auto-detected), or pass a custom path with `--config`.

Supported options:

- `ignore`: warnings/pitfalls to ignore (e.g. `P001`, `W002`)
- `exclude_files`: metadata sources to ignore (glob, filename, or substring match)
- `parameters`: per-check parameters for configurable checks
- `profiles`: alternate configurations such as `unstable` or `prerelease`

Example:

```toml
ignore = ["W002"]
exclude_files = ["**/generated/**", "tmp_metadata.json"]

[parameters.P001]
ahead_significant_diff = 2

[parameters.W002]
stale_after_days = 3

[profiles.unstable]
ignore = ["W002", "P017"]

[profiles.unstable.parameters.P001]
ahead_significant_diff = 10

[profiles.prerelease]
ignore = []

[profiles.prerelease.parameters.P001]
ahead_significant_diff = 1
```

Use a specific profile:

```bash
poetry run rsmetacheck --input https://github.com/example/repo --config-profile unstable
```

Use a custom config path:

```bash
poetry run rsmetacheck --input https://github.com/example/repo --config ./ci/rsmetacheck.toml
```

### Output

The tool will:
Expand Down
30 changes: 30 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,36 @@ Run the analysis:
poetry run rsmetacheck --input repositories.json
```

### Configure Analysis Rules

RsMetaCheck can load a root-level `.rsmetacheck.toml` file to customize analysis behavior.

```toml
ignore = ["W002"]
exclude_files = ["tmp_metadata.json"]

[parameters.P001]
ahead_significant_diff = 10

[profiles.prerelease]
ignore = []

[profiles.unstable]
ignore = ["W002", "P017"]
```

Use a profile:

```bash
poetry run rsmetacheck --input https://github.com/example/repo --config-profile unstable
```

Use an explicit config path:

```bash
poetry run rsmetacheck --input https://github.com/example/repo --config ./ci/rsmetacheck.toml
```

## GitHub Action

You can integrate RSMetaCheck into your GitHub workflows:
Expand Down
39 changes: 37 additions & 2 deletions src/rsmetacheck/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
from pathlib import Path

from rsmetacheck.config import load_analysis_config
from rsmetacheck.run_analyzer import run_analysis
from rsmetacheck.run_somef import (
ensure_somef_configured,
Expand Down Expand Up @@ -69,9 +70,28 @@ def cli():
action="store_true",
help="Include both detected AND undetected pitfalls in the output JSON-LD.",
)
parser.add_argument(
"--config",
default=None,
help="Path to RsMetaCheck TOML config file (default: auto-detect .rsmetacheck.toml at repository root).",
)
parser.add_argument(
"--config-profile",
default=None,
help="Name of config profile to apply (e.g., unstable, prerelease).",
)

args = parser.parse_args()

try:
analysis_config = load_analysis_config(
config_path=args.config,
profile=args.config_profile,
)
except (FileNotFoundError, ValueError, OSError, Exception) as exc:
print(f"Error loading config: {exc}")
return

if args.skip_somef:
print(
f"Skipping SoMEF execution. Analyzing {len(args.input)} existing SoMEF output files..."
Expand All @@ -95,6 +115,7 @@ def cli():
args.analysis_output,
verbose=args.verbose,
notes_output=args.notes_output,
analysis_config=analysis_config,
)

else:
Expand All @@ -110,37 +131,51 @@ def cli():
"Codemeta generation is ENABLED. Codemeta files will be created for each repository."
)

any_somef_success = False

for input_item in args.input:
if input_item.startswith("http://") or input_item.startswith("https://"):
print(f"Processing repository URL: {input_item}")
run_somef_single(
success = run_somef_single(
input_item,
somef_output_dir,
threshold,
branch=args.branch,
generate_codemeta=generate_codemeta,
)
any_somef_success = any_somef_success or bool(success)
elif os.path.exists(input_item):
print(f"Processing repositories from file: {input_item}")
run_somef_batch(
success = run_somef_batch(
input_item,
somef_output_dir,
threshold,
branch=args.branch,
generate_codemeta=generate_codemeta,
)
any_somef_success = any_somef_success or bool(success)
else:
print(
f"Warning: Skipping invalid input (not a URL or existing file): {input_item}"
)

if not any_somef_success:
print(
"Error: SoMEF did not produce any outputs. Analysis is aborted."
)
print(
"Fix SoMEF/authentication issues and rerun, or run with --skip-somef on existing SoMEF JSON files."
)
return

print(f"\nRunning analysis on outputs in {somef_output_dir}...")
run_analysis(
somef_output_dir,
args.pitfalls_output,
args.analysis_output,
verbose=args.verbose,
notes_output=args.notes_output,
analysis_config=analysis_config,
)


Expand Down
133 changes: 129 additions & 4 deletions src/rsmetacheck/detect_pitfalls_main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import json
import copy
import fnmatch
import inspect
from pathlib import Path
from typing import Iterable, Union
from rsmetacheck.run_somef import CODEMETA_DEFAULT_NAME
from rsmetacheck.config import AnalysisConfig
from rsmetacheck.utils.pitfall_utils import extract_programming_languages
from rsmetacheck.utils.json_ld_utils import create_pitfall_jsonld, save_individual_pitfall_jsonld
from rsmetacheck.utils.somef_compat import normalize_somef_data
Expand Down Expand Up @@ -40,7 +44,89 @@
from rsmetacheck.scripts.warnings.w010 import detect_git_remote_shorthand_pitfall


def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[str, Path], output_file: Union[str, Path], verbose: bool = False, notes_output: Union[str, Path] = None):
def _source_matches_exclude_patterns(source_value: str, exclude_patterns: list[str]) -> bool:
source = str(source_value)
basename = Path(source).name

for pattern in exclude_patterns:
if fnmatch.fnmatch(source, pattern):
return True
if fnmatch.fnmatch(basename, pattern):
return True
if pattern in source:
return True

return False


def _filter_somef_data_by_excluded_files(data, exclude_patterns: list[str]):
if isinstance(data, dict):
filtered_dict = {}

for key, value in data.items():
if key == "source":
if isinstance(value, list):
kept_sources = [
src for src in value if not _source_matches_exclude_patterns(src, exclude_patterns)
]
if not kept_sources:
return None
filtered_dict[key] = kept_sources
else:
if _source_matches_exclude_patterns(value, exclude_patterns):
return None
filtered_dict[key] = value
continue

filtered_value = _filter_somef_data_by_excluded_files(value, exclude_patterns)
if filtered_value is not None:
filtered_dict[key] = filtered_value

return filtered_dict

if isinstance(data, list):
filtered_list = []
for item in data:
filtered_item = _filter_somef_data_by_excluded_files(item, exclude_patterns)
if filtered_item is not None:
filtered_list.append(filtered_item)
return filtered_list

return data


def _run_detector_with_parameters(detector_func, somef_data, file_name: str, parameters: dict):
if not parameters:
return detector_func(somef_data, file_name)

signature = inspect.signature(detector_func)
accepts_kwargs = any(
param.kind == inspect.Parameter.VAR_KEYWORD
for param in signature.parameters.values()
)

if accepts_kwargs:
return detector_func(somef_data, file_name, **parameters)

accepted_parameter_names = set(signature.parameters.keys()) - {"somef_data", "file_name"}
filtered_parameters = {
key: value for key, value in parameters.items() if key in accepted_parameter_names
}

if filtered_parameters:
return detector_func(somef_data, file_name, **filtered_parameters)

return detector_func(somef_data, file_name)


def detect_all_pitfalls(
json_files: Iterable[Path],
pitfalls_output_dir: Union[str, Path],
output_file: Union[str, Path],
verbose: bool = False,
notes_output: Union[str, Path] = None,
analysis_config: AnalysisConfig = None,
):
"""
Detect all software repository pitfalls in SoMEF output files using modular detectors.
Now also generates individual JSON-LD files for each repository.
Expand All @@ -49,12 +135,21 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
pitfalls_output_dir = Path(pitfalls_output_dir)
pitfalls_output_dir.mkdir(exist_ok=True, parents=True)
json_files = list(json_files)
config = analysis_config or AnalysisConfig.empty()

if not json_files:
print("No JSON files found for analysis.")
return

print(f"Analyzing {len(json_files)} SoMEF JSON files...")
if config.source_path:
print(f"Using config file: {config.source_path}")
if config.profile:
print(f"Using config profile: {config.profile}")
if config.ignored_checks:
print(f"Ignoring checks: {', '.join(sorted(config.ignored_checks))}")
if config.exclude_files:
print(f"Excluded source patterns: {config.exclude_files}")

results = {
"summary": {
Expand Down Expand Up @@ -321,6 +416,13 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
somef_data = json.load(f)

somef_data = normalize_somef_data(somef_data)
if config.exclude_files:
somef_data = _filter_somef_data_by_excluded_files(
copy.deepcopy(somef_data),
config.exclude_files,
)
if somef_data is None:
somef_data = {}

languages = extract_programming_languages(somef_data)

Expand All @@ -330,8 +432,16 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
repo_pitfall_results = []

for idx, (detector_func, pitfall_code) in enumerate(pitfall_detectors):
if config.is_ignored(pitfall_code):
continue

try:
detector_results = detector_func(somef_data, json_file.name)
detector_results = _run_detector_with_parameters(
detector_func,
somef_data,
json_file.name,
config.get_parameters(pitfall_code),
)
if not isinstance(detector_results, list):
detector_results = [detector_results]

Expand Down Expand Up @@ -500,7 +610,15 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
print(f"Error writing output file: {e}")


def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_output=None, verbose=False, notes_output=None):
def main(
input_dir=None,
somef_json_paths=None,
pitfalls_dir=None,
analysis_output=None,
verbose=False,
notes_output=None,
analysis_config: AnalysisConfig = None,
):
"""
Main function to run all pitfall detections.

Expand Down Expand Up @@ -541,7 +659,14 @@ def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_outp
print("No JSON files found for analysis.")
return

detect_all_pitfalls(json_files, pitfalls_directory, output_file, verbose, notes_output)
detect_all_pitfalls(
json_files,
pitfalls_directory,
output_file,
verbose,
notes_output,
analysis_config=analysis_config,
)

if __name__ == "__main__":
main()
Loading
Loading