#!/usr/bin/bash

# twa: a tiny website auditing script

shopt -s nocasematch

TWA_VERSION="1.11.0"

TWA_TIMEOUT="${TWA_TIMEOUT:-5}"
TWA_USER_AGENT="${TWA_USER_AGENT:-Mozilla/5.0}"
read -r -a TWA_CURLOPTS <<< "${TWA_CURLOPTS}"

declare -A TWA_CODES=(
  # Stage 0
  [TWA-0001]="Expected port 443 to be open, but it isn't"

  # Stage 1
  [TWA-0101]="HTTP redirects to HTTPS using a 302"
  [TWA-0102]="HTTP redirects to HTTP (not secure)"
  [TWA-0103]="HTTP doesn't redirect at all"

  # Stage 2
  [TWA-0201]="Skipping security checks due to no secure channel"
  [TWA-0202]="Strict-Transport-Security max-age is less than 6 months"
  [TWA-0203]="Strict-Transport-Security, but no includeSubDomains"
  [TWA-0204]="Strict-Transport-Security, but no preload"
  [TWA-0205]="Strict-Transport-Security missing"
  [TWA-0206]="X-Frame-Options is 'sameorigin', consider 'deny'"
  [TWA-0207]="X-Frame-Options is 'allow-from', consider 'deny' or 'none'"
  [TWA-0208]="X-Frame-Options missing"
  [TWA-0209]="X-Content-Type-Options missing"
  [TWA-0210]="X-XSS-Protection is '0'; XSS filtering disabled"
  [TWA-0211]="X-XSS-Protection sanitizes but doesn't block, consider mode=block?"
  [TWA-0212]="X-XSS-Protection missing"
  [TWA-0213]="Referrer-Policy specifies '\${rp}', consider 'no-referrer'?"
  [TWA-0214]="Referrer-Policy missing"
  [TWA-0215]="Content-Security-Policy 'default-src' is '\${csp_default_src}'"
  [TWA-0216]="Content-Security-Policy 'default-src' is missing"
  [TWA-0217]="Content-Security-Policy has one or more 'unsafe-inline' policies"
  [TWA-0218]="Content-Security-Policy has one or more 'unsafe-eval' policies"
  [TWA-0219]="Content-Security-Policy missing"
  [TWA-0220]="Feature-Policy missing"
  [TWA-0221]="Expect-CT missing 'enforce' directive"
  [TWA-0222]="Expect-CT missing 'report-uri' directive"
  [TWA-0223]="Expect-CT requires missing 'max-age' directive"
  [TWA-0224]="'Access-Control-Allow-Origin' field '*' allows resources to be accessable by any domain."
  [TWA-0225]="'Access-Control-Allow-Origin' field 'null' allows the 'Origin' header to be crafted to grant access to resources on this domain."
  [TWA-0226]="'Access-Control-Allow-Origin' header is not configured properly."
  [TWA-0227]="'Access-Control-Allow-Credentials' value is set to 'false'."
  [TWA-0228]="'Access-Control-Allow-Credentials' header is not configured properly."
  [TWA-0229]="'Cross-Origin-Embedder-Policy' allows cross-origin resources to be fetched without giving explicit permission."
  [TWA-0230]="'Cross-Origin-Opener-Policy' allows the document to be added to its opener's browsing context group."

  # Stage 3
  [TWA-0301]="Site sends 'Server' with what looks like a version tag: \${server}"
  [TWA-0302]="Site sends a long 'Server', probably disclosing version info: \${server}"
  [TWA-0303]="Site sends '\${badheader}', probably disclosing version info: '\${content}'"

  # Stage 4
  [TWA-0401]="SCM repository being served at: \${url}"
  [TWA-0402]="Possible SCM repository being served (maybe protected?) at: \${url}"
  [TWA-0403]="Environment file being served at: \${url}"
  [TWA-0404]="Possible environment file being served (maybe protected?) at: \${url}"
  [TWA-0405]="Config file being served at: \${url}"
  [TWA-0406]="Possible config file being served (maybe protected?) at: \${url}"
  [TWA-0407]="Package management file being served at: \${url}"
  [TWA-0408]="Possible package management file being served (maybe protected?) at: \${url}"
  [TWA-0409]="Build file being served at: \${url}"
  [TWA-0410]="Possible build file being served (maybe protected?) at: \${url}"

  # Stage 5
  [TWA-0501]="No robots file found at: \${domain}/robots.txt"
  [TWA-0502]="robots.txt lists what looks like an admin panel"
  [TWA-0503]="robots.txt lists what looks like CGI scripts"
  [TWA-0504]="No security file found at: \${domain}/.well-known/security.txt"

  # Stage 6
  [TWA-0601]="No CAA records found"
  [TWA-0602]="Domain doesn't specify any valid issuers"
  [TWA-0603]="Domain explicitly disallows all issuers"
  [TWA-0604]="Domain doesn't specify any violation reporting endpoints"

  # Stage 7
  [TWA-0701]="Domain is listening on a development/backend port \${dev_port} (\${dev_port_comment})"

  # Stage 8
  [TWA-0801]="cookie '\${cookie_name}' has 'secure' but no 'httponly' flag"
  [TWA-0802]="cookie '\${cookie_name}' has no 'secure' flag"
  [TWA-0803]="cookie '\${cookie_name}' has SameSite set to 'lax'"
  [TWA-0804]="cookie '\${cookie_name}' has SameSite set to 'none' or is not set properly"
  [TWA-0805]="cookie '\${cookie_name}' has missing or empty 'SameSite' flag"
  [TWA-0806]="cookie '\${cookie_name}' must contain a 'Domain' attribute"
  [TWA-0807]="cookie '\${cookie_name}' must not contain a 'Domain' attribute"
  [TWA-0808]="cookie '\${cookie_name}' must contain a 'Path' attribute"
  [TWA-0809]="cookie '\${cookie_name}' 'Domain' attribute must match the domain being tested"
  [TWA-0810]="cookie '\${cookie_name}' 'Path' attribute must contain a value of '/'"

  # Stage 9
  [TWA-0901]="testssl reports '\${finding}' ('\${id}')"
)

# If we're being sourced, stop execution here.
# You can use this to read `TWA_CODES` above via `source` in other programs.
[[ "${BASH_SOURCE[0]}" == "${0}" ]] || return

# TTY check to prevent breaking scripts
# Respect the "NO_COLOR" spec: https://no-color.org/
if [[ -t 1 && -z "${NO_COLOR}" ]]; then
  # Colored output
  TWA_COLOR_GREEN=$(tput setaf 2)
  TWA_COLOR_BLUE=$(tput setaf 4)
  TWA_COLOR_RED=$(tput setaf 1)
  TWA_COLOR_PURPLE=$(tput setaf 5)
  TWA_COLOR_RESET=$(tput sgr0)
fi

function usage {
  echo "Usage: twa [-wvcsdV] <domain>"
}

function installed {
  cmd=$(command -v "${1}")

  [[ -n "${cmd}" ]] && [[ -f "${cmd}" ]]
  return ${?}
}

function die {
  echo "Error: $*" >&2
  exit 1
}

function warn {
  echo "Warn: $*" >&2
}

function verbose {
  [[ -n "${verbose}" ]] && echo "[+] $*" >&2
}

function ensure {
  "$@" \
    || die "Failed to run '$*'. Aborting."
}

function probe {
  verbose "Probing ${1}:${2}"
  nc -z -w "${TWA_TIMEOUT}" "${1}" "${2}" 2>/dev/null
}

function fetch {
  curl "${TWA_CURLOPTS[@]}" -A "'${TWA_USER_AGENT}'" --max-time "${TWA_TIMEOUT}" "${@}"
}

function fetch_headers {
  verbose "Fetching headers for ${1}"
  fetch -s -I "${1}"
}

function fetch_respcode {
  fetch -o /dev/null -s -I -L -w "%{http_code} %{url_effective}" "${1}"
}

function get_header {
  header="${1,,}"

  verbose "Extracting ${header}"

  awk "tolower(\$0) ~ /^${header}:/ { print substr(\$0, index(\$0, \$2)) }" | tr -d '\r\n'
}

function get_field {
  field="${1,,}"

  verbose "Extracting ${field}"

  awk "BEGIN { RS = \";\" }
      tolower(\$1) ~ /^${field}/ { print substr(\$0, index(\$0, \$2)) }" | tr -d '\r\n'
}

function get_cookies {
  verbose "Extracting cookies from headers"

  awk "tolower(\$0) ~ /^set-cookie:/ { print substr(\$0, index(\$0, \$2)) }"
}

function output {
  if [[ -n "${csv}" ]]; then
    message="${3//\"/\"\"}"
    message="\"${message}\""

    echo "${1},${2},${message},${4}"
  else
    # default is ANSI
    # colorize first arg
    first=""
    if [ "${1}" == "PASS" ]; then
      first="${TWA_COLOR_GREEN}${1}${TWA_COLOR_RESET}"
    elif [ "${1}" == "MEH" ]; then
      first="${TWA_COLOR_BLUE}${1}${TWA_COLOR_RESET}"
    elif [ "${1}" == "FAIL" ] || [ "${1}" == "FATAL" ]; then
      first="${TWA_COLOR_RED}${1}${TWA_COLOR_RESET}"
    elif [ "${1}" == "UNK" ] || [ "${1}" == "SKIP" ]; then
      first="${TWA_COLOR_PURPLE}${1}${TWA_COLOR_RESET}"
    fi
    echo -e "$first(${2}): ${3}"
  fi
}

# PASS: A test passed with flying colors (nothing wrong at all).
function PASS {
  output "PASS" "${domain}" "${1}"
}

# MEH: A test passed, but with one or more things that could be improved.
function MEH {
  msg=$(eval "echo \"${1}: ${TWA_CODES[${1}]}\"")
  output "MEH" "${domain}" "${msg}" "${1}"
}

# FAIL: A test failed, and should be fixed.
function FAIL {
  msg=$(eval "echo \"${1}: ${TWA_CODES[${1}]}\"")
  output "FAIL" "${domain}" "${msg}" "${1}"
}

# FATAL: A really important test failed, and should be fixed immediately.
function FATAL {
  msg=$(eval "echo \"${1}: ${TWA_CODES[${1}]}\"")
  output "FATAL" "${domain}" "${msg}" "${1}"
}

# UNK: The server gave us something we didn't understand.
function UNK {
  output "UNK" "${domain}" "${1}"
}

# SKIP: This test hasn't been implemented yet.
function SKIP {
  output "SKIP" "${domain}" "${1}"
}


# Stage 0: The server should support HTTPS.
#
# Checks:
# * Connecting via port 443 should yield a valid certificate and HTTP connection.
#
# This test is a little special, in that it sets "no_https" if it fails. Stage 2
# checks "no_https" and doesn't run if it's set, as security headers are pointless over HTTP.
# As such, failure in this stage is marked as "FATAL".
function stage_0_has_https {
  verbose "Stage 0: Server supports HTTPS"

  # First, just probe the domain to see if it is listening on 443.
  if ! probe "${domain}" 443 ; then
    FATAL TWA-0001
    no_https=1 ; return
  fi

  # The rest below would be cool, but is probably unnecessary since `curl`
  # won't connect to an insecure HTTPS server by default.
  # # Then, use `openssl` to retrieve the certificate and test its validity.
  # cert=$(openssl s_client -connect "${domain}:443" < /dev/null)

  # expiry=$(openssl x509 -noout -dates <<< "${cert}")

  # not_before=$(grep -o "^notBefore=.+" <<< "${expiry}" | cut -d = -f2-)
  # not_after=$(grep -o "^notAfter=.+" <<< "${expiry}" | cut -d = -f2-)

  # if [[ -z "${not_before}" || -z "${not_after}" ]]; then
  #   # I don't think this will ever happen, but it doesn't hurt to check.
  #   FAIL "server sent a certificate, but it's missing either a notBefore or a notAfter?"
  # else
  #   not_before=$(date -d "${not_before}" +%s)
  #   not_after=$(date -d "${not_after}" +%s)
  #   now=$(date +%s)

  #   if [[ "${now}" -lt "${not_before}" ]]; then
  #     FAIL ""
  #   fi
  # fi
}


# Stage 1: HTTP should be redirected to HTTPS; no exceptions.
#
# Checks:
# * Connecting via port 80 should return HTTP 301 with a HTTPS location.
function stage_1_redirection {
  verbose "Stage 1: HTTP -> HTTPS redirection"
  headers=$(fetch_headers "http://${domain}")

  location=$(get_header "Location" <<< "${headers}")
  read -r response < <(awk '/^HTTP/ { print $2 }' <<< "${headers}")

  if [[ "${location}" =~ ^https ]]; then
    if [[ "${response}" =~ ^30[18]$ ]]; then
      PASS "HTTP redirects to HTTPS using a ${response}"
    elif [[ "${response}" =~ ^30[27]$ ]]; then
      MEH TWA-0101
    else
      UNK "HTTP sends an HTTPS location but with a weird response code: ${response}"
    fi
  elif [[ "${location}" =~ ^http ]]; then
    FAIL TWA-0102
  else
    FAIL TWA-0103
  fi
}


# Stage 2: The server should specify a decent set of security headers.
#
# Checks:
# * Strict-Transport-Security should specify a max-age
# * X-Frame-Options should be "deny"
# * X-Content-Type-Options should be "nosniff"
# * X-XSS-Protection should be "1; mode=block"
# * Referrer-Policy should be "no-referrer"
# * Content-Security-Policy should be whatever awful policy string is the most secure.
#
# TODO: Add Feature-Policy?
function stage_2_security_headers {
  verbose "Stage 2: Sane security headers"

  if [[ -n "${no_https}" ]]; then
    FATAL TWA-0201
    return
  fi

  headers=$(fetch_headers "https://${domain}")

  sts=$(get_header "Strict-Transport-Security" <<< "${headers}")
  sts_max_age=$(get_field "max-age" <<< "${sts}" | awk -F= '{ print $2 }')
  sts_incl_subs=$(get_field "includeSubDomains" <<< "${sts}")
  sts_preload=$(get_field "preload" <<< "${sts}")

  if [[ -n "${sts}" ]]; then
    if [[ -n "${sts_max_age}" ]]; then
      if [[ "${sts_max_age}" -ge 15768000 ]]; then
        PASS "Strict-Transport-Security max-age is at least 6 months"
      else
        MEH TWA-0202
      fi
    else
      UNK "Strict-Transport-Security received, but no max-age"
    fi

    if [[ -n "${sts_incl_subs}" ]]; then
      PASS "Strict-Transport-Security specifies includeSubDomains"
    else
      MEH TWA-0203
    fi

    if [[ -n "${sts_preload}" ]]; then
      PASS "Strict-Transport-Security specifies preload"
    else
      MEH TWA-0204
    fi
  else
    FAIL TWA-0205
  fi

  xfo=$(get_header "X-Frame-Options" <<< "${headers}")
  xfo=${xfo,,}

  if [[ -n "${xfo}" ]]; then
    if [[ "${xfo}" == "deny" ]]; then
      PASS "X-Frame-Options is 'deny'"
    elif [[ "${xfo}" == "sameorigin" ]]; then
      MEH TWA-0206
    elif [[ "${xfo}" =~ ^allow-from ]]; then
      MEH TWA-0207
    else
      UNK "X-Frame-Options set to something weird: ${xfo}"
    fi
  else
    FAIL TWA-0208
  fi

  xcto=$(get_header "X-Content-Type-Options" <<< "${headers}")

  if [[ -n "${xcto}" ]]; then
    if [[ "${xcto,,}" == "nosniff" ]]; then
      PASS "X-Content-Type-Options is 'nosniff'"
    else
      UNK "X-Content-Type-Options set to something weird: ${xcto}"
    fi
  else
    FAIL TWA-0209
  fi

  xxp=$(get_header "X-XSS-Protection" <<< "${headers}")
  xxp=${xxp,,}

  if [[ -n "${xxp}" ]]; then
    if [[ "${xxp}" == 0 ]]; then
      FAIL TWA-0210
    elif [[ "${xxp}" =~ ^1 ]]; then
      if [[ "${xxp}" =~ mode=block ]]; then
        PASS "X-XSS-Protection specifies mode=block"
      else
        MEH TWA-0211
      fi
    fi
  else
    FAIL TWA-0212
  fi

  rp=$(get_header "Referrer-Policy" <<< "${headers}")
  rp=${rp,,}

  if [[ -n "${rp}" ]]; then
    if [[ "${rp}" == "no-referrer" ]]; then
      PASS "Referrer-Policy specifies 'no-referrer'"
    elif [[ "${rp}" == "unsafe-url"
            || "${rp}" == "no-referrer-when-downgrade"
            || "${rp}" == "origin"
            || "${rp}" == "origin-when-cross-origin"
            || "${rp}" == "same-origin"
            || "${rp}" == "strict-origin" ]]; then
      MEH TWA-0213
    fi
  else
    FAIL TWA-0214
  fi

  csp=$(get_header "Content-Security-Policy" <<< "${headers}")
  csp_default_src=$(get_field "default-src" <<< "${csp}" | cut -d " " -f2-)

  if [[ -n "${csp}" ]]; then
    if [[ -n "${csp_default_src}" ]]; then
      if [[ "${csp_default_src//\'}" == "none" ]]; then
        PASS "Content-Security-Policy 'default-src' is 'none'"
      else
        MEH TWA-0215
      fi
    else
      FAIL TWA-0216
    fi

    if [[ "${csp}" =~ unsafe-inline ]]; then
      FAIL TWA-0217
    fi

    if [[ "${csp}" =~ unsafe-eval ]]; then
      FAIL TWA-0218
    fi
  else
    FAIL TWA-0219
  fi

  fp=$(get_header "Feature-Policy" <<< "${headers}")

  if [[ -n "${fp}" ]]; then
    SKIP "Feature-Policy checks not implemented yet"
  else
    FAIL TWA-0220
  fi


  # Expect-CT Header checks

  # Requirements:
  #
  # Must have "max-age" attribute set
  #
  # "enforce" attribute is optional but recommended to enforce
  # compliance to the CT Policy
  #
  # "report-uri" attribute is optional but it's required to have
  # a directive value. This value doesn't need to be the same
  # domain or web origin as the host being reported.
  expect_ct=$(get_header "Expect-CT" <<< "${headers}")

  # "Expect-CT" test is skipped if header isn't found.
  if [[ -n "${expect_ct}" ]]; then

    # If max_age check fails, don't bother checking anything else.
    if [[ "${expect_ct}" =~ max-age ]]; then
      PASS "'max-age' directive is defined for Expect-CT"

      if [[ "${expect_ct}" =~ enforce ]]; then
        PASS "'enforce' directive is defined for Expect-CT"
      else
        MEH TWA-0221
      fi

      if [[ "${expect_ct}" =~ report-uri ]]; then
        PASS "'report-uri' directive is defined for Expect-CT"
      else
        MEH TWA-0222
      fi

    else
      FAIL TWA-0223
    fi

  fi

  # 'Access-Control-Allow-Origin' Header checks.

  # Requirements:
  #
  # This test only happens if the header exists or else
  # it will be skipped entirely.
  #
  # Source: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Origin
  #
  ac_allow_origin=$(get_header "Access-Control-Allow-Origin" <<< "${headers}")

  # Only perform this check if the header exists.
  if [[ -n "${ac_allow_origin}" ]]; then
      allow_origin_field=$(get_field <<< "${ac_allow_origin}")

      case "${allow_origin_field}" in
          "https://${domain}")
              PASS "'Access-Control-Allow-Origin' is properly configured for this domain."
              ;;
          "*")
              # This resource can be accessed by any domain
              FAIL TWA-0224
              ;;
          "null")
              # The 'Origin' header can be crafted to grant access to the resource and is not recommended to use.
              FAIL TWA-0225
              ;;
          *)
              # This header is not configured properly.
              FAIL TWA-0226
              ;;
      esac
  fi

  # 'Access-Control-Allow-Credentials' Header checks.

  # Requirements:
  #
  # This test only happens if the header exists or else
  # it will be skipped entirely.
  #
  # Source: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Credentials
  #
  ac_allow_credentials=$(get_header "Access-Control-Allow-Credentials" <<< "${headers}")

  if [[ -n "${ac_allow_credentials}" ]]; then
      allow_credentials_field=$(get_field <<< "${ac_allow_credentials}")

      case "${allow_credentials_field}" in
          "true")
              PASS "'Access-Control-Allow-Credentials' is properly configured for this domain."
              ;;
          "false")
              # It's better to remove this header entirely instead of setting the value to false.
              FAIL TWA-0227
              ;;
          *)
              # This header is not configured properly
              FAIL TWA-0228
              ;;
      esac
  fi


  # 'Cross-Origin-Embedder-Policy' Header checks.

  # Requirements:
  #
  # This test only happens if the header exists or else
  # it will be skipped entirely.
  #
  # Source: https://html.spec.whatwg.org/multipage/origin.html#coep
  # Source: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cross-Origin-Embedder-Policy
  #
  coep=$(get_header "Cross-Origin-Embedder-Policy" <<< "${headers}")

  if [[ -n "${coep}" ]]; then
    coep_field=$(get_field <<< "${coep}")

    case "${coep_field}" in
      "require-corp")
        PASS "'Cross-Origin-Embedder-Policy' Fetching cross-origin resources requires the server's explicit permission through the CORS protocol or the 'Cross-Origin-Resource-Policy' header."
        ;;
      "unsafe-none")
        # cross-origin resources can be fetched without giving explicit permission through the CORS protocol or the `Cross-Origin-Resource-Policy` header.
        MEH TWA-0229
        ;;
      *)
        UNK "Unknown header field value for 'Cross-Origin-Embedder-Policy': ${coep_field}"
        ;;
    esac
  fi


  # 'Cross-Origin-Opener-Policy' Header checks.

  # Requirements:
  #
  # This test only happens if the header exists or else
  # it will be skipped entirely.
  #
  # Source: https://html.spec.whatwg.org/multipage/origin.html#cross-origin-opener-policies
  # Source: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cross-Origin-Opener-Policy
  #
  coop=$(get_header "Cross-Origin-Opener-Policy" <<< "${headers}")

  if [[ -n "${coop}" ]]; then
    coop_field=$(get_field <<< "${coop}")

    case "${coop_field}" in
      "same-origin")
        PASS "'Cross-Origin-Opener-Policy' Isolates the browsing context exclusively to same-origin documents."
        ;;
      "same-origin-allow-popups")
        PASS "'Cross-Origin-Opener-Policy' Retains references to newly opened windows or tabs."
        ;;
      "unsafe-none")
        # Allows the document to be added to its opener's browsing context group.
        MEH TWA-0230
        ;;
      *)
        UNK "Unknown header field value for 'Cross-Origin-Opener-Policy': ${coop_field}"
        ;;
    esac
  fi
}

# Stage 3: The server should disclose a minimum amount of information about itself.
#
# Checks:
#  * The "Server:" header shouldn't contain a version number or OS distribution code.
#  * The server shouldn't be sending common nonstandard identifying headers (X-Powered-By)
function stage_3_server_information_disclosure {
  verbose "Stage 3: Information disclosure"
  server=$(get_header "Server" <<< "${headers}")

  if [[ -n "${server}" ]]; then
    server_wc=$(wc -w <<< "${server}")
    if [[ "${server_wc}" -le 1 ]]; then
      if [[ "${server}" == */* ]]; then
        FAIL TWA-0301
      else
        PASS "Site sends 'Server', but probably only a vendor ID: ${server}"
      fi
    else
      FAIL TWA-0302
    fi
  else
    PASS "Site doesn't send 'Server' header"
  fi

  for badheader in X-Powered-By Via X-AspNet-Version X-AspNetMvc-Version; do
    content=$(get_header "${badheader}" <<< "${headers}")

    if [[ -n "${content}" ]]; then
      FAIL TWA-0303
    else
      PASS "Site doesn't send '${badheader}'"
    fi
  done
}


# Stage 4: The server shouldn't be serving SCM repositories, build tool files,
# or common environment files.
#
# Checks:
# * GET /.git/HEAD should 404.
# * GET /.hg/store/00manifest.i should 404.
# * GET /.svn/entries should 404.
# * GET /.env should 404.
# * GET /.envrc should 404.
# * GET /.dockerenv should 404.
function stage_4_repo_and_env_disclosure {
  verbose "Stage 4: SCM repo and env file disclosure"

  for repo_file in .git/HEAD .hg/store/00manifest.i .svn/entries; do
    url="http://${domain}/${repo_file}"

    read -r -a resp < <(fetch_respcode "${url}")
    code="${resp[0]}"
    eurl="${resp[1]}"

    if [[ "${code}" -eq 404 || "${eurl}" != *${repo_file} ]]; then
      PASS "No SCM repository at: ${url}"
    elif [[ "${code}" -eq 200 ]]; then
      FAIL TWA-0401
    elif [[ "${code}" -eq 403 ]]; then
      MEH TWA-0402
    else
      UNK "Got a weird response code (${code}) when testing for SCM at: ${url}"
    fi
  done

  for env_file in .env .envrc .dockerenv; do
    url="http://${domain}/${env_file}"

    read -r -a resp < <(fetch_respcode "${url}")
    code="${resp[0]}"
    eurl="${resp[1]}"

    if [[ "${code}" -eq 404 || "${eurl}" != *${env_file} ]]; then
      PASS "No environment file at: ${url}"
    elif [[ "${code}" -eq 200 ]]; then
      FAIL TWA-0403
    elif [[ "${code}" -eq 403 ]]; then
      MEH TWA-0404
    else
      UNK "Got a weird response code (${code}) when testing for an environment file at: ${url}"
    fi
  done

  config_files=(
    config.xml
    config.json
    config.yaml
    config.ini
    config.cfg
  )

  for config_file in "${config_files[@]}"; do
    url="http://${domain}/${config_file}"

    read -r -a resp < <(fetch_respcode "${url}")
    code="${resp[0]}"
    eurl="${resp[1]}"

    if [[ "${code}" -eq 404 || "${eurl}" != *${config_file} ]]; then
      PASS "No config file at: ${url}"
    elif [[ "${code}" -eq 200 ]]; then
      FAIL TWA-0405
    elif [[ "${code}" -eq 403 ]]; then
      MEH TWA-0406
    else
      UNK "Got a weird response code (${code}) when testing for a config file at: ${url}"
    fi
  done

  pm_files=(
    # Node.js
    .npmrc
    package.json
    package-lock.json
    # Ruby
    .gem/credentials
    Gemfile
    Gemfile.lock
    Rakefile
    # Python
    .pypirc
    setup.py
    setup.cfg
    requirements.txt
    Pipfile
    Pipfile.lock
    pyproject.toml
    # Rust
    Cargo.lock
    Cargo.toml
  )

  for pm_file in "${pm_files[@]}"; do
    url="http://${domain}/${pm_file}"

    read -r -a resp < <(fetch_respcode "${url}")
    code="${resp[0]}"
    eurl="${resp[1]}"

    if [[ "${code}" -eq 404 || "${eurl}" != *${pm_file} ]]; then
      PASS "No package management file at: ${url}"
    elif [[ "${code}" -eq 200 ]]; then
      FAIL TWA-0407
    elif [[ "${code}" -eq 403 ]]; then
      MEH TWA-0408
    else
      UNK "Got a weird response code (${code}) when testing for a config file at: ${url}"
    fi
  done

  build_files=(
    # Docker
    Dockerfile
    docker-compose.yml
    # Make
    Makefile
    GNUMakefile
    # CMake
    CMakeLists.txt
    # Autoconf and related tools
    configure
    configure.ac
    Makefile.am
    Makefile.in
    # Other build tools
    Justfile
  )

  for build_file in "${build_files[@]}"; do
    url="http://${domain}/${build_file}"

    read -r -a resp < <(fetch_respcode "${url}")
    code="${resp[0]}"
    eurl="${resp[1]}"

    if [[ "${code}" -eq 404 || "${eurl}" != *${build_file} ]]; then
      PASS "No build file at: ${url}"
    elif [[ "${code}" -eq 200 ]]; then
      FAIL TWA-0409
    elif [[ "${code}" -eq 403 ]]; then
      MEH TWA-0410
    else
      UNK "Got a weird response code (${code}) when testing for a config file at: ${url}"
    fi
  done
}

# Stage 5: Check if the server provides a robots.txt file.
#
# Check for "The Robots Exclusion Protocol".
# Will follow redirects to ensure file exists.
function stage_5_robots_and_security_check {
  verbose "Stage 5: robots.txt and security.txt checks"

  url="${domain}/robots.txt"
  read -r -a resp < <(fetch_respcode "${url}")
  code="${resp[0]}"

  if [[ "${code}" -eq 200 ]]; then
    PASS "Site provides robots.txt"
    robots=$(fetch -L -s "${url}")

    if [[ "${robots}" =~ .*admin.* ]]; then
       MEH TWA-0502
    fi

    if [[ "${robots}" =~ .*cgi-bin.* ]]; then
       MEH TWA-0503
    fi
  elif [[ "${code}" -eq 404 ]]; then
    MEH TWA-0501
  else
    UNK "Got a weird response code (${code}) when testing for robots.txt at: ${url}"
  fi

  url="${domain}/.well-known/security.txt"
  read -r -a resp < <(fetch_respcode "${url}")
  code="${resp[0]}"

  if [[ "${code}" -eq 200 ]]; then
    PASS "Site provides security.txt"

    # TODO(ww): Maybe test the contents of security.txt.
  elif [[ "${code}" -eq 404 ]]; then
    MEH TWA-0504
  else
    UNK "Got a weird response code (${code}) when testing for security.txt at: ${url}"
  fi
}

# Stage 6: Check for CAA records.
#
# Checks:
# * The domain should specify at least one issue record.
# * The domain should specify at least one iodef record.
function stage_6_caa {
  verbose "Stage 6: CAA checks"

  issuers=()
  wildcard_issuers=()
  iodefs=()
  valid=""
  subdom="${domain}"

  while [[ $subdom == *.* && -z "${valid}" ]]; do
    verbose "Checking ${subdom}"
    records=$(dig +noall +answer caa "${subdom}")

    if [[ -n "${records}" ]]; then
      while read -r -a record; do
        type="${record[3]}"
        flag="${record[4]}"
        tag="${record[5]}"
        value="${record[6]//\"}"

        if [[ "${type}" == "CAA" ]]; then
          if [[ -n "${flag}" && -n "${tag}" && -n "${value}" ]]; then
            if [[ "${flag}" -eq 0 ]]; then
              if [[ "${tag}" == "issue" ]]; then
                if [[ -n "${value}" ]]; then
                  issuers+=("${value}")
                  valid+="Y"
                else
                  UNK "Missing value for issue tag?"
                fi
              elif [[ "${tag}" == "issuewild" ]]; then
                if [[ -n "${value}" ]]; then
                  wildcard_issuers+=("${value}")
                  valid+="Y"
                else
                  UNK "Missing value for issuewild tag?"
                fi
              elif [[ "${tag}" == "iodef" ]]; then
                if [[ -n "${value}" ]]; then
                  iodefs+=("${value}")
                  valid+="Y"
                else
                  UNK "Missing value for iodef tag?"
                fi
              else
                UNK "Weird (nonstandard?) CAA tag: ${tag}"
              fi
            else
              UNK "Nonzero CAA flags: ${flag} for ${tag} ${value}"
            fi
          else
            UNK "Malformed CAA record? (flag=${flag}, tag=${tag}, value=${value})"
          fi
        fi
      done <<< "${records}"
    fi
    # Move up a level
    subdom="${subdom#*.}"
  done

  if [[ -z "${valid}" ]]; then
    FAIL TWA-0601
    return
  fi

  if [[ "${#issuers[@]}" -eq 0 ]]; then
    FAIL TWA-0602
  elif [[ "${#issuers[@]}" -eq 1 && "${issuers[0]}" == ";" ]]; then
    # NOTE(ww): If every site should have HTTPS, then every domain should have CAA
    # record(s) that specify all valid issuers for certificates.
    FAIL TWA-0603
  else
    PASS "Domain explicitly allows one or more issuers: ${issuers[*]}"
  fi

  # TODO(ww): What's the best "issuewild" state? Is there one?
  # References:
  # https://github.com/SSLMate/caatestsuite/issues/9
  # https://community.qualys.com/message/41613-re-caa-feature-request-warn-when-there-is-no-issuewild

  if [[ "${#iodefs[@]}" -eq 0 ]]; then
    # NOTE(ww): This could be a failure, but it's more of a reporting shortcoming
    # than a security/privacy issue.
    MEH TWA-0604
  else
    PASS "Domain specifies one or more reporting endpoints: ${iodefs[*]}"
  fi
}

# Stage 7: Check for common open development/backend ports.
#
# Checks:
#  * Each port should not respond to a connection request.
function stage_7_open_development_ports {
  declare -A dev_ports=(
    [1433]='Microsoft SQL Server default port'
    [3000]='node.js (express.js), ruby on rails'
    [3050]='Interbase, Firebird default port'
    [3306]='MySQL and MariaDB default port'
    [4443]='common https development port'
    [4567]='sinatra default port'
    [5000]='Flask and Kestrel default port'
    [5432]='PostgreSQL default port'
    [6379]='Redis default port'
    [8000]='common http development port'
    [8008]='common http development port'
    [8080]='common http development port'
    [8081]='common http development port'
    [8086]='InfluxDB HTTP service default port'
    [8088]='common http development port'
    [8093]='Couchbase Query service REST traffic'
    [8443]='common https development port'
    [8888]='common http development port'
    [9200]='Elasticsearch REST API default port'
    [9292]='rack default port'
    [27017]='MongoDB default port'
    [33060]='MySQL X-Protocol default port'
  )

  # Skip the portscan if the '-d' flag is set
  if [[ -n "${disableportscan}" ]]; then
    return
  fi

  for dev_port in "${!dev_ports[@]}"; do
    dev_port_comment=${dev_ports[$dev_port]}
    # Start probe in a parallel process because, on closed ports the time out takes some seconds.
    if probe "${domain}" "${dev_port}"; then
      FAIL TWA-0701
    else
      PASS "Domain is not listening on port ${dev_port} (${dev_port_comment})"
    fi &
    # Limit the script to 2 port scans per second by 1/2 second wait.
    sleep 0.5
  done
  # Wait until all port scans are completed.
  wait
}

# Stage 8: Cookie checks.
#
# Checks:
#  * all cookies must have the secure flag.
#  * each cookie should have the httponly flag. one MEH for each cookie that doesn't.
function stage_8_cookie_checks {
  verbose "Stage 8: Cookie checks"

  headers=$(fetch_headers "https://${domain}")
  cookies=$(get_cookies <<< "${headers}")

  if [[ -n "${cookies}" ]]; then

    mapfile -t cookiearray <<< "${cookies}"

    for cookie in "${cookiearray[@]}"; do
      cookie_name=$(cut -d= -f1 <<< "${cookie}")
      has_secure=$(get_field "secure" <<< "${cookie}")
      has_httponly=$(get_field "HttpOnly" <<< "${cookie}")
      has_samesite=$(get_field "SameSite" <<< "${cookie}")

      if [[ -n "${has_secure}" && -n "${has_httponly}" ]]; then
        PASS "cookie '${cookie_name}' has both 'secure' and 'httponly' flags"
      elif [[ -n "${has_secure}" && -z "${has_httponly}" ]]; then
        MEH TWA-0801
      else
        FAIL TWA-0802
      fi

      if [[ -n "${has_samesite}" ]]; then
        samesite_value=$(awk -F= '{ print tolower($2) }' <<< "${has_samesite}")

        if [[ "${samesite_value}" == "strict" ]]; then
          PASS "cookie '${cookie_name}' has 'strict' SameSite flag set"
        elif [[ "${samesite_value}" == "lax" ]]; then
          MEH TWA-0803
        else
          FAIL TWA-0804
        fi
      else
        MEH TWA-0805
      fi


      # Cookie prefix checks

      # __Secure requirements:
      #
      # Must have "Secure" attribute
      # Must come from URI that is considered "secure"
      # Must have "Domain" attribute and it's value must equal the domain being tested
      has_secure_prefix=$(get_field "__secure-" <<< "${cookie}")

      # Used in both checks.
      has_domain=$(get_field "domain" <<< "${cookie}")

      # Test is skipped if "has_secure_prefix" isn't found.
      if [[ -n ${has_secure_prefix} ]]; then
        if [[ -n ${has_secure} ]]; then
          if [[ -n ${has_domain} ]]; then
            domain_value=$(awk -F= '{ print tolower($2) }' <<< "${has_domain}")
            if [[ "${domain_value}" == "${domain}" ]]; then
              PASS "cookie '${cookie_name}' passes all '__Secure' prefix checks"
            else
              FAIL TWA-0809
            fi
          else
            FAIL TWA-0806
          fi
        else
          FAIL TWA-0802
        fi
      fi


      # __Host requirements:
      #
      # Must have "Secure" attribute
      # Must come from URI that is "secure"
      # Must NOT contain a "Domain" attribute
      # Must contain a "Path" attribute with a value of "/"
      has_host_prefix=$(get_field "__host-" <<< "${cookie}")

      # Test is skipped if "has_host_prefix" isn't found.
      if [[ -n ${has_host_prefix} ]]; then
        if [[ -n ${has_secure} ]]; then
          if [[ -z ${has_domain} ]]; then
            has_path=$(get_field "path" <<< "${cookie}")
            if [[ -n ${has_path} ]]; then
              has_correct_path_value=$(awk -F= '{ print tolower($2) }' <<< "${has_path}")
              if [[ -n ${has_correct_path_value} ]]; then
                PASS "cookie '${cookie_name}' passes all '__Host' prefix checks"
              else
                FAIL TWA-0810
              fi
            else
              FAIL TWA-0808
            fi
          else
            FAIL TWA-0807
          fi
        else
          FAIL TWA-0802
        fi
      fi

    done

  fi
}

# Stage 9 (optional): Run 'testssl.sh'
#
# This test is completely optional and will be skipped if 'testssl.sh'
# is not found in your PATH environment variable. By default, 'testssl.sh'
# will not run unless the '-s' flag is presented.
#
# Runs 'testssl.sh' which checks a server's service on any port
# for the support of TLS/SSL ciphers, protocols as well as some
# cryptographic flaws.
#
# Additional information about 'testssl.sh' can be
# located at: https://github.com/drwetter/testssl.sh
#
function stage_9_testssl {
  verbose "Stage 9: SSL checks"

  declare -A severity_map=(
    [OK]=PASS
    [LOW]=MEH
    [MEDIUM]=FAIL
    [HIGH]=FAIL
    [CRITICAL]=FATAL
  )

  # Skip the test if the '-s' flag is missing
  if [[ -z "${testssl}" ]]; then
    return
  fi

  if installed testssl; then
    testssl=testssl
  elif installed testssl.sh; then
    testssl=testssl.sh
  else
    UNK "'testssl' not found or may not be installed. Check your PATH environment variable."
    return
  fi

  verbose "testssl is running, be patient"

  testssl_json=$(mktemp -t -u twa-testssl-XXXXX)
  trap 'rm -f ${testssl_json}' EXIT
  "${testssl}" --jsonfile "${testssl_json}" "${domain}" > /dev/null

  while read -r record; do
    IFS=$'\x01' read -ra record <<< "${record}"
    severity="${record[0]}"
    id="${record[1]}"
    finding="${record[2]}"

    if [[ "${severity}" == "INFO" ]]; then
      verbose "testssl: '${finding}' (${id})"
    elif [[ "${severity}" == "OK" ]]; then
      PASS "testssl: '${finding}'"
    else
      severity="${severity_map[${severity}]}"

      if [[ -z "${severity}" ]]; then
        UNK "testssl: '${finding}'"
      else
        "${severity}" TWA-0901
      fi
    fi
  done < <(jq -r $'.[] | [.severity, .id, .finding] | join("\x01")' < "${testssl_json}")
}



while getopts ":wvcsdV" opt; do
  case "${opt}" in
    w ) www=1 ;;
    v ) verbose=1 ;;
    c ) csv=1 ;;
    s ) testssl=1 ;;
    d ) disableportscan=1 ;;
    V ) echo "twa version ${TWA_VERSION}"; exit ;;
    \? ) usage; exit ;;
  esac
done

shift $((OPTIND - 1))

# RFC4343: Domain Name System (DNS) Case Insensitivity Clarification
# In the twa script all case insensitive compares are done by convert to lowercase.
domain="${1,,}"

[[ -n "${domain}" ]] || { usage; exit 1;  }
[[ "${domain}" =~ ^https?:// ]] && die "Expected a bare domain, e.g. 'example.com'"

if [[ -n "${www}" ]]; then
  if [[ "${domain}" =~ ^www ]]; then
    warn "Expected non-www domain with -w, not going any deeper than ${domain}"
  else
    "${0}" "www.${domain}"
  fi
fi

# If we're in CSV mode, write some column titles.
if [[ -n "${csv}" ]]; then
  output status domain message code
fi

stage_0_has_https
stage_1_redirection
stage_2_security_headers
stage_3_server_information_disclosure
stage_4_repo_and_env_disclosure
stage_5_robots_and_security_check
stage_6_caa
stage_7_open_development_ports
stage_8_cookie_checks
stage_9_testssl

# TODO(ww): Something in stage 9 is causing twa to exit with 1, despite appearing
# to succeed otherwise. Find it and destroy it.
exit 0
