Settings

  • default_settings.py

    /usr/local/lib/python3.10/site-packages/scrapydweb/default_settings.py

  • user settings

    /usr/src/app/scrapydweb_settings_v10.py

  • main_pid: 8

  • logparser_pid: None

  • poll_pid: None

    ScrapydWeb

    • * server *

      {
          "SCRAPYDWEB_BIND": "0.0.0.0",
          "SCRAPYDWEB_PORT": 5000,
          "URL_SCRAPYDWEB": "http://127.0.0.1:5000",
          "ENABLE_AUTH": False,
          "USERNAME": "",
          "PASSWORD": ""
      }
    • ENABLE_HTTPS = False

      {
          "CERTIFICATE_FILEPATH": "",
          "PRIVATEKEY_FILEPATH": ""
      }

    Scrapy

    • SCRAPY_PROJECTS_DIR

      /var/lib/scrapydweb/data/demo_projects

    Scrapyd

    • * servers *

      {
          "": [
              "scrapyd-1:6800",
              "scrapyd-2:6800",
              "scrapyd-3:6800"
          ]
      }
    • LOCAL_SCRAPYD_SERVER

      ''
    • LOCAL_SCRAPYD_LOGS_DIR

      ''
    • SCRAPYD_LOG_EXTENSIONS

      ['.log', '.log.gz', '.txt']

    LogParser

    • ENABLE_LOGPARSER = False

    • version: 0.8.2

    • settings.py

      /usr/local/lib/python3.10/site-packages/logparser/settings.py

    • BACKUP_STATS_JSON_FILE = True

    Timer tasks

    • scheduler.state: STATE_RUNNING

    • JOBS_SNAPSHOT_INTERVAL = 300

    Run Spider

    • details

      {
          "SCHEDULE_EXPAND_SETTINGS_ARGUMENTS": False,
          "SCHEDULE_CUSTOM_USER_AGENT": "Mozilla/5.0",
          "SCHEDULE_USER_AGENT": None,
          "SCHEDULE_ROBOTSTXT_OBEY": None,
          "SCHEDULE_COOKIES_ENABLED": None,
          "SCHEDULE_CONCURRENT_REQUESTS": None,
          "SCHEDULE_DOWNLOAD_DELAY": None,
          "SCHEDULE_ADDITIONAL": "-d setting=CLOSESPIDER_TIMEOUT=60\r\n-d setting=CLOSESPIDER_PAGECOUNT=10\r\n-d arg1=val1"
      }

    Page Display

    • details

      {
          "SHOW_SCRAPYD_ITEMS": True,
          "SHOW_JOBS_JOB_COLUMN": True,
          "JOBS_FINISHED_JOBS_LIMIT": 5000,
          "JOBS_RELOAD_INTERVAL": 300,
          "DAEMONSTATUS_REFRESH_INTERVAL": 10
      }

    Send Text

    • Slack

      {
          "SLACK_TOKEN": "",
          "SLACK_CHANNEL": "general"
      }
    • Telegram

      {
          "TELEGRAM_TOKEN": "",
          "TELEGRAM_CHAT_ID": 0
      }
    • Email

      {
          "EMAIL_SUBJECT": "Email from #scrapydweb"
      }
    • email sender & recipients

      {
          "EMAIL_USERNAME": "",
          "EMAIL_PASSWORD": "",
          "EMAIL_SENDER": "",
          "EMAIL_RECIPIENTS": [
              ""
          ]
      }
    • email smtp settings

      {
          "SMTP_SERVER": "",
          "SMTP_PORT": 0,
          "SMTP_OVER_SSL": False,
          "SMTP_CONNECTION_TIMEOUT": 30
      }

    Monitor & Alert

    • ENABLE_MONITOR = False

    • poll interval

      {
          "POLL_ROUND_INTERVAL": 300,
          "POLL_REQUEST_INTERVAL": 10
      }
    • alert switcher

      {
          "ENABLE_SLACK_ALERT": False,
          "ENABLE_TELEGRAM_ALERT": False,
          "ENABLE_EMAIL_ALERT": False
      }
    • alert working time

      [
          {
              "ALERT_WORKING_DAYS": "[]",
              "remark": "Monday is 1 and Sunday is 7"
          },
          {
              "ALERT_WORKING_HOURS": "[]",
              "remark": "From 0 to 23"
          }
      ]
    • triggers

      {
          "ON_JOB_RUNNING_INTERVAL": 0,
          "ON_JOB_FINISHED": False,
          "CRITICAL": {
              "LOG_CRITICAL_THRESHOLD": 0,
              "LOG_CRITICAL_TRIGGER_STOP": False,
              "LOG_CRITICAL_TRIGGER_FORCESTOP": False
          },
          "ERROR": {
              "LOG_ERROR_THRESHOLD": 0,
              "LOG_ERROR_TRIGGER_STOP": False,
              "LOG_ERROR_TRIGGER_FORCESTOP": False
          },
          "WARNING": {
              "LOG_WARNING_THRESHOLD": 0,
              "LOG_WARNING_TRIGGER_STOP": False,
              "LOG_WARNING_TRIGGER_FORCESTOP": False
          },
          "REDIRECT": {
              "LOG_REDIRECT_THRESHOLD": 0,
              "LOG_REDIRECT_TRIGGER_STOP": False,
              "LOG_REDIRECT_TRIGGER_FORCESTOP": False
          },
          "RETRY": {
              "LOG_RETRY_THRESHOLD": 0,
              "LOG_RETRY_TRIGGER_STOP": False,
              "LOG_RETRY_TRIGGER_FORCESTOP": False
          },
          "IGNORE": {
              "LOG_IGNORE_THRESHOLD": 0,
              "LOG_IGNORE_TRIGGER_STOP": False,
              "LOG_IGNORE_TRIGGER_FORCESTOP": False
          }
      }

    System

    • DEBUG = False

    • VERBOSE = True

    • DATA_PATH

      /var/lib/scrapydweb/data
    • DATABASE

      {
          "APSCHEDULER_DATABASE_URI": "sqlite:////var/lib/scrapydweb/data/database/apscheduler.db",
          "SQLALCHEMY_DATABASE_URI": "sqlite:////var/lib/scrapydweb/data/database/timer_tasks.db",
          "SQLALCHEMY_BINDS_METADATA": "sqlite:////var/lib/scrapydweb/data/database/metadata.db",
          "SQLALCHEMY_BINDS_JOBS": "sqlite:////var/lib/scrapydweb/data/database/jobs.db"
      }