#!/bin/sh
# Capybara's anticonf script by Pacha (2025)

# Detect number of cores
if [ -n "$CAPYBARA_NCORES" ]; then
  num_cores="$CAPYBARA_NCORES"
else
  num_cores=$(${R_HOME}/bin/Rscript -e "cat(as.integer(parallel::detectCores()/2))" 2>/dev/null || echo 0)
  if [ "$num_cores" = "0" ] || [ -z "$num_cores" ]; then
    if [ -f /proc/cpuinfo ]; then
      num_cores=$(grep -c "^processor" /proc/cpuinfo 2>/dev/null || echo 1)
    elif [ "$(uname)" = "Darwin" ]; then
      num_cores=$(sysctl -n hw.ncpu 2>/dev/null || echo 1)
    else
      num_cores=1
    fi
    num_cores=$(($((num_cores / 2)) > 1 ? $((num_cores / 2)) : 1))
  fi
fi

# Find compiler
CXX=$(${R_HOME}/bin/R CMD config CXX)
CXXFLAGS=$(${R_HOME}/bin/R CMD config CXXFLAGS)
SHLIB_OPENMP_CXXFLAGS=$(${R_HOME}/bin/R CMD config SHLIB_OPENMP_CXXFLAGS 2>/dev/null || echo "")

# Find BLAS/LAPACK
BLAS_LIBS=$(${R_HOME}/bin/R CMD config BLAS_LIBS)
LAPACK_LIBS=$(${R_HOME}/bin/R CMD config LAPACK_LIBS)

# OpenMP support
OPENMP_SUPPORT="no"
cat > testomp.cpp <<EOF
#include <omp.h>
int main() {
  #pragma omp parallel
  {
    int tid = omp_get_thread_num();
  }
  return 0;
}
EOF
if $CXX $CXXFLAGS -fopenmp testomp.cpp -o testomp >/dev/null 2>&1; then
  OPENMP_SUPPORT="yes"
fi
rm -f testomp testomp.cpp

# Detect CPU features for SIMD optimization
SIMD_SUPPORT=""
AVX2_SUPPORT="no"
SSE42_SUPPORT="no"

if [ -f /proc/cpuinfo ]; then
    if grep -q avx2 /proc/cpuinfo 2>/dev/null; then
        AVX2_SUPPORT="yes"
        SIMD_SUPPORT="AVX2"
    elif grep -q sse4_2 /proc/cpuinfo 2>/dev/null; then
        SSE42_SUPPORT="yes"
        SIMD_SUPPORT="SSE4.2"
        echo "- [V] SSE4.2 support detected"
    fi
elif [ "$(uname)" = "Darwin" ]; then
    if sysctl machdep.cpu.features 2>/dev/null | grep -q AVX2; then
        AVX2_SUPPORT="yes"
        SIMD_SUPPORT="AVX2"
    elif sysctl machdep.cpu.features 2>/dev/null | grep -q SSE4.2; then
        SSE42_SUPPORT="yes"
        SIMD_SUPPORT="SSE4.2"
        echo "- [V] SSE4.2 support detected"
    fi
fi

# Test compiler flag
# Usage: test_flag "-O3"
test_flag() {
  echo 'int main(){return 0;}' > testrconf.cpp
  if $CXX $CXXFLAGS $1 testrconf.cpp -o testrconf >/dev/null 2>&1; then
    rm -f testrconf testrconf.cpp
    return 0
  else
    rm -f testrconf testrconf.cpp
    return 1
  fi
}

# Remove any -std= from CXX and CXXFLAGS
CXX=$(echo "$CXX" | sed -E 's/ *-std=[^ ]+//g')
CXXFLAGS=$(echo "$CXXFLAGS" | sed -E 's/ *-std=[^ ]+//g')

# C++ standard detection (must come after test_flag is defined)
CXX_STD="CXX11"  # Default fallback
STD_FLAG="-std=c++11"
if test_flag "-std=c++20"; then
  CXX_STD="CXX20"
  STD_FLAG="-std=c++20"
elif test_flag "-std=c++17"; then
  CXX_STD="CXX17"
  STD_FLAG="-std=c++17"
elif test_flag "-std=c++14"; then
  CXX_STD="CXX14"
  STD_FLAG="-std=c++14"
fi

# Add the detected standard to CXXFLAGS
CXXFLAGS="$CXXFLAGS $STD_FLAG"

# High-performance optimization flags
OPTFLAGS=""
if [ "$CAPYBARA_OPTIMIZATIONS" = "yes" ]; then
  # Core optimization flags
  for flag in -O3 -DNDEBUG -ffast-math; do
    if test_flag "$flag"; then
      OPTFLAGS="$OPTFLAGS $flag"
    fi
  done
  
  # Architecture-specific optimizations
  for flag in -march=native -mtune=native; do
    if test_flag "$flag"; then
      OPTFLAGS="$OPTFLAGS $flag"
    fi
  done
  
  # SIMD optimizations based on detected features
  if [ "$AVX2_SUPPORT" = "yes" ]; then
    for flag in -mavx -mavx2 -mfma; do
      if test_flag "$flag"; then
        OPTFLAGS="$OPTFLAGS $flag"
      fi
    done
  elif [ "$SSE42_SUPPORT" = "yes" ]; then
    for flag in -msse2 -msse3 -msse4.1 -msse4.2; do
      if test_flag "$flag"; then
        OPTFLAGS="$OPTFLAGS $flag"
      fi
    done
  fi
  
  # Loop optimization flags
  for flag in -funroll-loops -fprefetch-loop-arrays; do
    if test_flag "$flag"; then
      OPTFLAGS="$OPTFLAGS $flag"
    fi
  done
  
  # Vectorization flags
  for flag in -ftree-vectorize -ftree-slp-vectorize; do
    if test_flag "$flag"; then
      OPTFLAGS="$OPTFLAGS $flag"
    fi
  done
  
  # Link-time optimization
  if test_flag "-flto"; then
    OPTFLAGS="$OPTFLAGS -flto"
  fi
fi

# Compose ARMA_FLAGS
ARMA_FLAGS="-DARMA_NO_DEBUG -DARMA_USE_BLAS -DARMA_USE_LAPACK -DARMA_OPENMP_THREADS=$num_cores"

# Add 64-bit word size for better SIMD alignment
ARMA_FLAGS="$ARMA_FLAGS -DARMA_64BIT_WORD"

# Compose Makevars
if [ ! -f "src/Makevars.in" ]; then
  echo "ERROR: src/Makevars.in template not found"
  exit 1
fi

sed -e "s|@CXX_STD@|${CXX_STD}|g" \
    -e "s|@OPTFLAGS@|${OPTFLAGS}|g" \
    -e "s|@ncores@|${num_cores}|g" \
    src/Makevars.in > src/Makevars

# Print summary
cat <<EOF
==========================================================================
Capybara configuration summary

  C++ Compiler:           $CXX
  C++ Standard:           $CXX_STD
  OpenMP Support:         $OPENMP_SUPPORT
  OpenMP Threads:         $num_cores
  SIMD Support:           ${SIMD_SUPPORT:-None}
  BLAS:                   $BLAS_LIBS
  LAPACK:                 $LAPACK_LIBS
  High-Performance Mode:  ${CAPYBARA_OPTIMIZATIONS:-no}
  Optimization Flags:     $OPTFLAGS

EOF
echo "For faster computation"
echo " "
if [ "${CAPYBARA_OPTIMIZATIONS:-no}" != "yes" ]; then
  echo " Enable high-performance optimizations run:"
  echo " export CAPYBARA_OPTIMIZATIONS=yes"
  echo " "
fi
echo " Change thread count run:"
echo " export CAPYBARA_NCORES=<number>"
echo " "
echo " Then reinstall the package"
cat <<EOF
==========================================================================
EOF

exit 0
