riseproject-dev · luhenry · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/.ci/docker/common/install_android.sh b/.ci/docker/common/install_android.sh
@@ -43,10 +43,10 @@ install_ndk() {
   ARCH=$(uname -m)
   if [ "${ARCH}" = "aarch64" ]; then
     # aarch64 NDK is not cached on S3, download from Google directly
-    curl -Os --retry 3 "https://dl.google.com/android/repository/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
+    curl -Os --retry 3 --retry-all-errors "https://dl.google.com/android/repository/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
   else
     # The NDK installation is cached on ossci-android S3 bucket
-    curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
+    curl -Os --retry 3 --retry-all-errors "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
   fi
   unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
 
@@ -62,7 +62,7 @@ install_cmdtools() {
 
   pushd /tmp
   # The file is cached on ossci-android S3 bucket
-  curl -Os --retry 3 "https://ossci-android.s3.us-west-1.amazonaws.com/${CMDTOOLS_FILENAME}"
+  curl -Os --retry 3 --retry-all-errors "https://ossci-android.s3.us-west-1.amazonaws.com/${CMDTOOLS_FILENAME}"
   unzip -qo "${CMDTOOLS_FILENAME}" -d /opt
 
   ls -lah /opt/cmdline-tools/bin

diff --git a/.ci/docker/common/install_cache.sh b/.ci/docker/common/install_cache.sh
@@ -34,7 +34,7 @@ install_ubuntu() {
 
 install_binary() {
   echo "Downloading sccache binary from S3 repo"
-  curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache
+  curl --retry 3 --retry-all-errors https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache
   chmod +x /opt/cache/bin/sccache
 }
 

diff --git a/.ci/docker/common/install_docs_reqs.sh b/.ci/docker/common/install_docs_reqs.sh
@@ -12,10 +12,10 @@ if [ -n "$BUILD_DOCS" ]; then
   # Ignore error if gpg-agent doesn't exist (for Ubuntu 16.04)
   apt-get install -y gpg-agent || :
 
-  curl --retry 3 -sL https://deb.nodesource.com/setup_16.x | sudo -E bash -
+  curl --retry 3 --retry-all-errors -sL https://deb.nodesource.com/setup_16.x | sudo -E bash -
   sudo apt-get install -y nodejs
 
-  curl --retry 3 -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
+  curl --retry 3 --retry-all-errors -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
   echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
 
   apt-get update

diff --git a/.ci/docker/common/install_linter.sh b/.ci/docker/common/install_linter.sh
@@ -15,5 +15,5 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 pip_install -r requirements-lintrunner.txt
 
 # Install google-java-format
-curl -L --retry 3 https://github.com/google/google-java-format/releases/download/v1.23.0/google-java-format_linux-x86-64 > /opt/google-java-format
+curl -L --retry 3 --retry-all-errors https://github.com/google/google-java-format/releases/download/v1.23.0/google-java-format_linux-x86-64 > /opt/google-java-format
 chmod +x /opt/google-java-format
diff --git a/.ci/scripts/export_model_artifact.sh b/.ci/scripts/export_model_artifact.sh
@@ -195,9 +195,17 @@ case "$HF_MODEL" in
     PREPROCESSOR_FEATURE_SIZE=""
     PREPROCESSOR_OUTPUT=""
     ;;
+  SocialLocalMobile/gemma-4-31B-it-HQQ-INT4)
+    MODEL_NAME="gemma4_31b"
+    TASK=""
+    MAX_SEQ_LEN=""
+    EXTRA_PIP=""
+    PREPROCESSOR_FEATURE_SIZE=""
+    PREPROCESSOR_OUTPUT=""
+    ;;
   *)
     echo "Error: Unsupported model '$HF_MODEL'"
-    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4"
+    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4, SocialLocalMobile/gemma-4-31B-it-HQQ-INT4"
     exit 1
     ;;
 esac
@@ -459,6 +467,50 @@ if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
   exit 0
 fi
 
+# Gemma 4 31B uses a prequantized checkpoint and custom export script
+if [ "$MODEL_NAME" = "gemma4_31b" ]; then
+  pip install safetensors huggingface_hub gguf
+
+  # Download prequantized model outside OUTPUT_DIR to avoid uploading on failure
+  LOCAL_MODEL_DIR=$(mktemp -d)
+  INDUCTOR_CACHE=$(mktemp -d)
+  trap 'rm -rf "$LOCAL_MODEL_DIR" "$INDUCTOR_CACHE"' EXIT
+
+  python -c "from huggingface_hub import snapshot_download; snapshot_download('${HF_MODEL}', local_dir='${LOCAL_MODEL_DIR}')"
+
+  # Sanity check: run inference on the prequantized model
+  echo "::group::Inference sanity check"
+  INFERENCE_OUTPUT=$(python -m executorch.examples.models.gemma4_31b.inference \
+      --prequantized "$LOCAL_MODEL_DIR" \
+      --prompt "What is the capital of France?" \
+      --max-new-tokens 32 \
+      --temperature 0 \
+      --no-compile 2>&1)
+  echo "$INFERENCE_OUTPUT"
+  if ! echo "$INFERENCE_OUTPUT" | grep -q "Paris"; then
+    echo "ERROR: Inference sanity check failed — expected 'Paris' in output"
+    exit 1
+  fi
+  echo "::endgroup::"
+
+  # Copy tokenizer for the runner
+  cp "$LOCAL_MODEL_DIR/tokenizer.json" "${OUTPUT_DIR}/tokenizer.json"
+
+  # Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
+  echo "::group::Export"
+  TORCHINDUCTOR_CACHE_DIR="$INDUCTOR_CACHE" \
+  python -m executorch.examples.models.gemma4_31b.export \
+      --prequantized "$LOCAL_MODEL_DIR" \
+      --output-dir "${OUTPUT_DIR}"
+  echo "::endgroup::"
+
+  test -f "${OUTPUT_DIR}/model.pte"
+  test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
+  ls -al "${OUTPUT_DIR}"
+
+  exit 0
+fi
+
 MAX_SEQ_LEN_ARG=""
 if [ -n "$MAX_SEQ_LEN" ]; then
   MAX_SEQ_LEN_ARG="--max_seq_len $MAX_SEQ_LEN"

diff --git a/.ci/scripts/setup-emscripten.sh b/.ci/scripts/setup-emscripten.sh
@@ -9,7 +9,7 @@ set -ex
 
 # need version >= 17
 install_node() {
-    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash
+    curl --retry 3 --retry-all-errors -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash
     source "$HOME/.nvm/nvm.sh"
     nvm install 22
 }

diff --git a/.ci/scripts/setup-macos.sh b/.ci/scripts/setup-macos.sh
@@ -34,7 +34,7 @@ install_buck() {
   # team for help.
   BUCK2_VERSION=$(cat ci_commit_pins/buck2.txt)
   BUCK2=buck2-aarch64-apple-darwin-${BUCK2_VERSION}.zst
-  curl -s "https://ossci-macos.s3.amazonaws.com/${BUCK2}" -o "${BUCK2}"
+  curl -s --retry 3 --retry-all-errors "https://ossci-macos.s3.amazonaws.com/${BUCK2}" -o "${BUCK2}"
 
   zstd -d "${BUCK2}" -o buck2
 
@@ -68,7 +68,7 @@ install_sccache() {
   # NB: The function is adopted from PyTorch MacOS build workflow
   # https://github.com/pytorch/pytorch/blob/main/.github/workflows/_mac-build.yml
   if ! command -v sccache &> /dev/null; then
-    sudo curl --retry 3 "https://s3.amazonaws.com/ossci-macos/sccache/sccache-v0.4.1-${RUNNER_ARCH}" --output "${SCCACHE_PATH}/sccache"
+    sudo curl --retry 3 --retry-all-errors "https://s3.amazonaws.com/ossci-macos/sccache/sccache-v0.4.1-${RUNNER_ARCH}" --output "${SCCACHE_PATH}/sccache"
     sudo chmod +x "${SCCACHE_PATH}/sccache"
   fi
 

diff --git a/.ci/scripts/setup-mediatek-deps.sh b/.ci/scripts/setup-mediatek-deps.sh
@@ -14,7 +14,7 @@ install_neuropilot() {
   echo "Start installing neuropilot."
   mkdir -p "${MEDIATEK_INSTALLATION_DIR}"
 
-  curl -Lo /tmp/neuropilot-express.tar.gz "https://s3.ap-southeast-1.amazonaws.com/mediatek.neuropilot.com/06302508-4c94-4bf2-9789-b0ee44e83e27.gz"
+  curl -Lo /tmp/neuropilot-express.tar.gz --retry 3 --retry-all-errors "https://s3.ap-southeast-1.amazonaws.com/mediatek.neuropilot.com/06302508-4c94-4bf2-9789-b0ee44e83e27.gz"
   echo "Finishing downloading neuropilot sdk."
   tar zxvf /tmp/neuropilot-express.tar.gz --strip-components=1 --directory "${MEDIATEK_INSTALLATION_DIR}"
   echo "Finishing unzip neuropilot sdk."
@@ -33,7 +33,7 @@ setup_neuropilot() {
 }
 
 setup_calibration_data() {
-  curl -Lo /tmp/imagenette2-160.tgz https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz
+  curl -Lo /tmp/imagenette2-160.tgz --retry 3 --retry-all-errors https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz
   tar zxvf /tmp/imagenette2-160.tgz --strip-components=1 --directory "${MEDIATEK_INSTALLATION_DIR}"
 }
 

diff --git a/.ci/scripts/setup-openvino.sh b/.ci/scripts/setup-openvino.sh
@@ -37,7 +37,7 @@ else
   echo "Using OpenVINO stable release: ${OPENVINO_BUILD}"
 fi
 
-curl -Lo /tmp/openvino_toolkit.tgz --retry 3 --fail ${OPENVINO_URL}
+curl -Lo /tmp/openvino_toolkit.tgz --retry 3 --retry-all-errors --fail ${OPENVINO_URL}
 tar -xzf /tmp/openvino_toolkit.tgz
 mv "${OPENVINO_EXTRACTED_DIR}" openvino
 

diff --git a/.ci/scripts/setup-samsung-linux-deps.sh b/.ci/scripts/setup-samsung-linux-deps.sh
@@ -43,7 +43,7 @@ download_and_extract() {
   local out_file="$3"
 
   echo "Downloading from ${download_url}..."
-  curl -fsSL --retry 3 \
+  curl -fsSL --retry 3 --retry-all-errors \
     -H "apikey: ${API_KEY}" \
     -o "${out_file}" \
     "${download_url}"

diff --git a/.ci/scripts/setup-vulkan-linux-deps.sh b/.ci/scripts/setup-vulkan-linux-deps.sh
@@ -16,7 +16,7 @@ install_swiftshader() {
 
   _tmp_archive="/tmp/${_swiftshader_archive}"
 
-  curl --silent --show-error --location --fail --retry 3 \
+  curl --silent --show-error --location --fail --retry 3 --retry-all-errors \
     --output "${_tmp_archive}" "$_https_amazon_aws/${_swiftshader_archive}"
 
   tar -C "${_swiftshader_dir}" -xzf "${_tmp_archive}"
@@ -35,7 +35,7 @@ install_vulkan_sdk() {
 
   _tmp_archive="/tmp/vulkansdk.tar.gz"
 
-  curl --silent --show-error --location --fail --retry 3 \
+  curl --silent --show-error --location --fail --retry 3 --retry-all-errors \
     --output "${_tmp_archive}" "${_vulkan_sdk_url}"
 
   tar -C "${_vulkan_sdk_dir}" -xJf "${_tmp_archive}"

diff --git a/.ci/scripts/test_cortex_m_e2e.sh b/.ci/scripts/test_cortex_m_e2e.sh
@@ -19,6 +19,7 @@ et_root_dir=$(realpath "${script_dir}/../..")
 
 # Quantization is the default for the cortex-m55 target; run.sh's
 # arg parser only recognizes --no_quantize, so we omit any explicit flag.
+export ARM_FVP_INSTALL_I_AGREE_TO_THE_CONTAINED_EULA=True
 bash "${et_root_dir}/examples/arm/run.sh" \
     --model_name="${MODEL}" \
     --target=cortex-m55 \

diff --git a/.ci/scripts/test_ios_ci.sh b/.ci/scripts/test_ios_ci.sh
@@ -55,7 +55,7 @@ mv $MODEL_NAME*.pte "$APP_PATH/Resources/Models/MobileNet/"
 
 say "Downloading Labels"
 
-curl https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt \
+curl --retry 3 --retry-all-errors https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt \
   -o "$APP_PATH/Resources/Models/MobileNet/imagenet_classes.txt"
 
 say "Creating Simulator"

diff --git a/.ci/scripts/test_model_e2e.sh b/.ci/scripts/test_model_e2e.sh
@@ -228,9 +228,21 @@ case "$HF_MODEL" in
     AUDIO_FILE=""
     IMAGE_PATH=""
     ;;
+  SocialLocalMobile/gemma-4-31B-it-HQQ-INT4)
+    MODEL_NAME="gemma4_31b"
+    RUNNER_TARGET="gemma4_31b_runner"
+    RUNNER_PATH="gemma4_31b"
+    EXPECTED_OUTPUT="Paris"
+    PREPROCESSOR=""
+    TOKENIZER_URL=""
+    TOKENIZER_FILE="tokenizer.json"
+    AUDIO_URL=""
+    AUDIO_FILE=""
+    IMAGE_PATH=""
+    ;;
   *)
     echo "Error: Unsupported model '$HF_MODEL'"
-    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4"
+    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4, SocialLocalMobile/gemma-4-31B-it-HQQ-INT4"
     exit 1
     ;;
 esac
@@ -244,19 +256,19 @@ echo "::group::Prepare $MODEL_NAME Artifacts"
 
 
 # Download tokenizer files (skip for models that bundle tokenizer in export or do not use one)
-if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ] && [ "$MODEL_NAME" != "qwen3_5_moe" ]; then
+if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ] && [ "$MODEL_NAME" != "qwen3_5_moe" ] && [ "$MODEL_NAME" != "gemma4_31b" ]; then
   if [ "$TOKENIZER_FILE" != "" ]; then
-    curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
+    curl -L --retry 3 --retry-all-errors $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
   else
-    curl -L $TOKENIZER_URL/tokenizer.json -o $MODEL_DIR/tokenizer.json
-    curl -L $TOKENIZER_URL/tokenizer_config.json -o $MODEL_DIR/tokenizer_config.json
-    curl -L $TOKENIZER_URL/special_tokens_map.json -o $MODEL_DIR/special_tokens_map.json
+    curl -L --retry 3 --retry-all-errors $TOKENIZER_URL/tokenizer.json -o $MODEL_DIR/tokenizer.json
+    curl -L --retry 3 --retry-all-errors $TOKENIZER_URL/tokenizer_config.json -o $MODEL_DIR/tokenizer_config.json
+    curl -L --retry 3 --retry-all-errors $TOKENIZER_URL/special_tokens_map.json -o $MODEL_DIR/special_tokens_map.json
   fi
 fi
 
 # Download test files
 if [ "$AUDIO_URL" != "" ]; then
-  curl -L $AUDIO_URL -o ${MODEL_DIR}/$AUDIO_FILE
+  curl -L --retry 3 --retry-all-errors $AUDIO_URL -o ${MODEL_DIR}/$AUDIO_FILE
 elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ]; then
   if ! command -v ffmpeg >/dev/null; then
     if [ "$(uname -s)" = "Linux" ] && command -v apt-get >/dev/null; then
@@ -278,7 +290,7 @@ fi
 
 # Download test image for vision models
 if [ -n "${IMAGE_URL:-}" ]; then
-  curl -L "$IMAGE_URL" -o "${MODEL_DIR}/test_image.jpg"
+  curl -L --retry 3 --retry-all-errors "$IMAGE_URL" -o "${MODEL_DIR}/test_image.jpg"
 fi
 
 ls -al
@@ -368,6 +380,9 @@ EOF
   qwen3_5_moe)
     RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 128 --temperature 0 --cuda_graph"
     ;;
+  gemma4_31b)
+    RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 128 --temperature 0 --cuda_graph"
+    ;;
   voxtral_realtime)
     RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
     # Add CUDA data path if present

diff --git a/.ci/scripts/test_riscv_qemu.sh b/.ci/scripts/test_riscv_qemu.sh
@@ -5,7 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 # CI wrapper: install RISC-V cross-compile + qemu-user tooling, then run the
-# RISC-V Phase 1 smoke test (export, cross-compile, qemu-user execution) via
+# RISC-V smoke test (export, cross-compile, qemu-user execution) via
 # examples/riscv/run.sh. The bundled-IO comparison and Test_result: PASS
 # check are done by run.sh.
 
@@ -14,5 +14,50 @@ set -eu
 script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
 et_root_dir=$(realpath "${script_dir}/../..")
 
+model="add"
+xnnpack=false
+quantize=false
+verbose=false
+verbose_xnnpack=false
+
+usage() {
+    cat <<EOF
+Usage: $(basename "$0") [options]
+Options:
+  --model=<NAME>     Which model to export and run (default: add)
+  --xnnpack          Enable the XNNPACK backend (AOT partitioner + runtime)
+  --quantize         Produce an 8-bit quantized model
+  --verbose          Enable XNNPACK partitioner DEBUG logging and dump the lowered graph
+  --verbose-xnnpack  Build XNNPACK with XNN_LOG_LEVEL=4 to log microkernel dispatch
+  -h, --help         Show this help
+EOF
+}
+
+for arg in "$@"; do
+    case $arg in
+        --model=*) model="${arg#*=}" ;;
+        --xnnpack) xnnpack=true ;;
+        --quantize) quantize=true ;;
+        --verbose) verbose=true ;;
+        --verbose-xnnpack) verbose_xnnpack=true ;;
+        -h|--help) usage; exit 0 ;;
+        *) echo "Unknown option: $arg" >&2; usage; exit 1 ;;
+    esac
+done
+
+run_extra_args=()
+if ${xnnpack}; then
+    run_extra_args+=(--xnnpack)
+fi
+if ${quantize}; then
+    run_extra_args+=(--quantize)
+fi
+if ${verbose}; then
+    run_extra_args+=(--verbose)
+fi
+if ${verbose_xnnpack}; then
+    run_extra_args+=(--verbose-xnnpack)
+fi
+
 bash "${et_root_dir}/examples/riscv/setup.sh"
-bash "${et_root_dir}/examples/riscv/run.sh"
+bash "${et_root_dir}/examples/riscv/run.sh" --model="${model}" "${run_extra_args[@]}"