使用Tengine替代Nginx作为负载均衡服务器


Tengine是由淘宝网发起的Web服务器项目。它在Nginx的基础上,针对大访问量网站的需求,添加了很多高级功能和特性。Tengine的性能和稳定性已经在大型的网站如淘宝网,天猫商城等得到了很好的检验。它的最终目标是打造一个高效、稳定、安全、易用的Web平台。

而本文章中的配置参数,都已经在生产环境中得到应用,针对10万高并发的状态以及8核CPU做了相应的优化。

1. 增大Nginx用户的open files数值
[root@idc1-server1 ~]$ sudo -i
[root@idc1-server1 ~]# vim /etc/security/limits.conf

 
nginx hard nofile 102400
nginx soft nofile 102400

2. 优化内核参数
[root@idc1-server1 ~]# vim /etc/sysctl.conf

 
# For Nginx
net.ipv4.ip_forward = 0
net.ipv4.conf.default.rp_filter = 1
net.ipv4.conf.default.accept_source_route = 0
kernel.sysrq = 0
kernel.core_uses_pid = 1
net.ipv4.tcp_syncookies = 1
kernel.msgmnb = 65536
kernel.msgmax = 65536
kernel.shmmax = 68719476736
kernel.shmall = 4294967296
net.ipv4.tcp_max_tw_buckets = 6000
net.ipv4.tcp_sack = 1
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_rmem = 4096 87380 4194304
net.ipv4.tcp_wmem = 4096 16384 4194304
net.core.wmem_default = 8388608
net.core.rmem_default = 8388608
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.core.netdev_max_backlog = 262144
net.core.somaxconn = 262144
net.ipv4.tcp_max_orphans = 3276800
net.ipv4.tcp_max_syn_backlog = 262144
net.ipv4.tcp_timestamps = 0
net.ipv4.tcp_synack_retries = 1
net.ipv4.tcp_syn_retries = 1
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_mem = 94500000 915000000 927000000
net.ipv4.tcp_fin_timeout = 1
net.ipv4.tcp_keepalive_time = 30
net.ipv4.ip_local_port_range = 1024 65000
fs.file-max = 102400

[root@idc1-server1 ~]# sysctl -p

3. 安装并配置Tengine
[root@idc1-server1 ~]# /etc/init.d/nginx stop
[root@idc1-server1 ~]# yum erase nginx
[root@idc1-server1 ~]# useradd -M -g nginx -d /opt/tengine -s /sbin/nologin -c "Nginx web server" nginx

[root@idc1-server1 ~]# mkdir -p /root/dong/downloads/
[root@idc1-server1 ~]# cd /root/dong/downloads/
[root@idc1-server1 downloads]# yum install gcc gcc-c+= pcre-devel openssl openssl-devel
[root@idc1-server1 downloads]# wget http://tengine.taobao.org/download/tengine-2.0.0.tar.gz
[root@idc1-server1 downloads]# tar xzvf tengine-2.0.0.tar.gz
[root@idc1-server1 downloads]# cd tengine-2.0.0
[root@idc1-server1 tengine-2.0.0]# ./configure --prefix=/opt/tengine
[root@idc1-server1 tengine-2.0.0]# make
[root@idc1-server1 tengine-2.0.0]# make install

[root@idc1-server1 tengine-2.0.0]# mkdir /opt/tengine/run
[root@idc1-server1 tengine-2.0.0]# touch /etc/init.d/tengine
[root@idc1-server1 tengine-2.0.0]# chmod +x /etc/init.d/tengine
[root@idc1-server1 tengine-2.0.0]# vim /etc/init.d/tengine

 
#!/bin/sh
#
# nginx - this script starts and stops the nginx daemon
#
# chkconfig:   - 85 15
# processname: nginx
# config:      /opt/tengine/conf/nginx.conf
# pidfile:     /opt/tengine/run/nginx.pid

# Source function library.
. /etc/rc.d/init.d/functions

# Source networking configuration.
. /etc/sysconfig/network

# Check that networking is up.
[ "$NETWORKING" = "no" ] && exit 0

nginx="/opt/tengine/sbin/nginx"
prog=$(basename $nginx)

lockfile="/var/lock/subsys/nginx"
pidfile="/opt/tengine/run/${prog}.pid"

NGINX_CONF_FILE="/opt/tengine/conf/nginx.conf"

start() {
    [ -x $nginx ] || exit 5
    [ -f $NGINX_CONF_FILE ] || exit 6
    echo -n $"Starting $prog: "
    daemon $nginx -c $NGINX_CONF_FILE
    retval=$?
    echo
    [ $retval -eq 0 ] && touch $lockfile
    return $retval
}

stop() {
    echo -n $"Stopping $prog: "
    killproc -p $pidfile $prog
    retval=$?
    echo
    [ $retval -eq 0 ] && rm -f $lockfile
    return $retval
}

restart() {
    configtest_q || return 6
    stop
    start
}

reload() {
    configtest_q || return 6
    echo -n $"Reloading $prog: "
    killproc -p $pidfile $prog -HUP
    echo
}

configtest() {
    $nginx -t -c $NGINX_CONF_FILE
}

configtest_q() {
    $nginx -t -q -c $NGINX_CONF_FILE
}

rh_status() {
    status $prog
}

rh_status_q() {
    rh_status >/dev/null 2>&1
}

# Upgrade the binary with no downtime.
upgrade() {
    local oldbin_pidfile="${pidfile}.oldbin"

    configtest_q || return 6
    echo -n $"Upgrading $prog: "
    killproc -p $pidfile $prog -USR2
    retval=$?
    sleep 1
    if [[ -f ${oldbin_pidfile} && -f ${pidfile} ]];  then
        killproc -p $oldbin_pidfile $prog -QUIT
        success $"$prog online upgrade"
        echo 
        return 0
    else
        failure $"$prog online upgrade"
        echo
        return 1
    fi
}

# Tell nginx to reopen logs
reopen_logs() {
    configtest_q || return 6
    echo -n $"Reopening $prog logs: "
    killproc -p $pidfile $prog -USR1
    retval=$?
    echo
    return $retval
}

case "$1" in
    start)
        rh_status_q && exit 0
        $1
        ;;
    stop)
        rh_status_q || exit 0
        $1
        ;;
    restart|configtest|reopen_logs)
        $1
        ;;
    force-reload|upgrade) 
        rh_status_q || exit 7
        upgrade
        ;;
    reload)
        rh_status_q || exit 7
        $1
        ;;
    status|status_q)
        rh_$1
        ;;
    condrestart|try-restart)
        rh_status_q || exit 7
        restart
	    ;;
    *)
        echo $"Usage: $0 {start|stop|reload|configtest|status|force-reload|upgrade|restart|reopen_logs}"
        exit 2
esac

[root@idc1-server1 tengine-2.0.0]# vim /opt/tengine/conf/nginx.conf

 
user nginx nginx;

worker_processes 8;
worker_cpu_affinity 00000001 00000010 00000100 00001000 00010000 00100000 01000000 10000000;

error_log /opt/tengine/logs/error.log;
pid /opt/tengine/run/nginx.pid;

worker_rlimit_nofile 102400;

events 
{
    use epoll;
    worker_connections 102400;
}

http 
{
    include mime.types;
    default_type application/octet-stream;
    charset utf-8;
	
    server_names_hash_bucket_size 128;
    client_header_buffer_size 4k;
    large_client_header_buffers 4 4k;
    client_max_body_size 8m;
	
    sendfile on;
    tcp_nopush on;
    
    keepalive_timeout 120;
	
    open_file_cache max=102400 inactive=20s;
    open_file_cache_min_uses 1;
    open_file_cache_valid 30s;

    tcp_nodelay on;
	
    gzip on;
    gzip_min_length 1k;
    gzip_buffers 4 16k;
    gzip_http_version 1.0;
    gzip_comp_level 2;
    gzip_types text/plain application/x-javascript text/css application/xml;
    gzip_vary on;

    upstream idc1-servers {
        check interval=3000 rise=2 fall=5 timeout=1000 type=http;
        check_http_send "GET /health HTTP/1.0\r\n\r\n";
        check_http_expect_alive http_2xx http_3xx;

        server 10.100.1.10:80;
        server 10.100.1.11:80 down;
        server 10.100.1.12:80;
        server 10.100.1.13:80;
        server 10.100.1.14:80;
        server 10.100.1.15:80;
        server 10.100.1.16:80;
        server 10.100.1.17:80;
        server 10.100.1.18:80;
        server 10.100.1.19:80;
        server 10.100.1.20:80;
    }

    server {
        listen 80;
        server_name 10.100.1.2;

        location / {
            proxy_pass http://idc1-servers;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        }

        location /status {
            check_status;
            access_log off;
        }
    }

    log_format 10.100.1.2 '$remote_addr - $remote_user [$time_local] $request '
                           '"$status" $body_bytes_sent "$http_referer" '
                           '"$http_user_agent" "$http_x_forwarded_for"';
    access_log /opt/tengine/logs/cluster.log 10.100.1.2;
}

注:在上面的配置文件中,我用了一个/health页面来检测健康状态,如果返回值为2xx或3xx则视为正常。这样我们在需要手动对某些服务器进行隔离或升级的时候,就可以暂时移走这个页面使Nginx自动将其移出集群。
另外在/status页面上启用了集群的状态展示页面,可以像HAProxy一样看到每个后端服务器的状态。

4. 启动Tengine并测试
[root@idc1-server1 tengine-2.0.0]# sudo /etc/init.d/tengine start
访问链接:http://10.100.1.2/health

,

  1. #1 by web on 2014/11/07 - 22:06

    可以做个压力测试吗?对比一下才有说服力

  2. #2 by webx32 on 2015/11/12 - 15:54

    编译不成功 一堆莫名其妙的问题 ubuntu系统

(will not be published)
*