一、绘制实例

当需要大量绘制物体时，代码看起来如下：


// *************************************************
// C++ 代码
// *************************************************
// 在屏幕上不同位置绘制1000个矩形，调用了1000次glDrawArray
for(unsigned int i = 0; i < 1000; i++)
{
    bindVAO();        // 绑定VAO
    bindTexture();    // 绑定纹理
    setUniforms();    // 设置uniform
    glDrawArray(GL_TRIANGLES, 0, 6);
}
// *************************************************
// 顶点着色器代码
// *************************************************
#version 330 core
layout(location = 0) in vec3 aPos;
uniform mat4 modelMatrix;
void main()
{
    gl_Position = modelMatrix * vec4(aPos, 1.0);
}

如果像这样绘制模型的大量实例(Instance)，你很快就会因为绘制调用过多而达到性能瓶颈。与绘制顶点本身相比，使用glDrawArrays或glDrawElements函数告诉GPU去绘制你的顶点数据会消耗更多的性能，因为OpenGL在绘制顶点数据之前需要做很多准备工作（比如告诉GPU该从哪个缓冲读取数据，从哪寻找顶点属性，而且这些都是在相对缓慢的CPU到GPU总线(CPU to GPU Bus)上进行的）。所以即便渲染顶点非常快，命令GPU去渲染却未必。
如果我们能够将数据一次性发送给GPU，然后使用一个绘制函数让OpenGL利用这些数据绘制多个物体，就会更方便了。这就是实例化(Instancing)。
使用方法如下，同样是绘制1000个矩形：


// *************************************
// C++ 代码
// *************************************
bindVAO();                        // 绑定VAO
bindTexture();                    // 绑定纹理
setUniform();                    // 设置uniform
glDrawArraysInstanced(GL_TRIANGLES, 0, 6,1000);    
// 除了最后一个参数，其他参数都和glDrawArray一样
// 最后一个参数表示绘制的实例个数，在这里6个顶点组成的两个三角形构成的矩形是一个实例，
// 在顶点着色器中由内建变量gl_InstanceID来表示当前迭代的顶点所属的实例ID
// 一个矩形有6个顶点，在这6个顶点的顶点着色器迭代中，都是同一个实例gl_InstanceID
// *************************************
// 顶点着色器代码
// *************************************
#version 330 core
layout(location = 0) in vec3 aPos;
uniform mat4 modelMatrix[1000];        
// 绘制数量可能会导致modelMatrix超出系统支持的大小上限
// 改进方法：
// 总结为：实例化数组：将数组数据定义为顶点属性，仅在顶点着色器渲染一个或多个新实例时才会更新。
// 具体为：将uniform数据改为顶点属性数据，且设置每绘制一个实例才会往前推进一个单位，默认都是每
//          个顶点迭代推荐一个单位，也即是每个顶点对应一个数据一个矩形6个顶点在一个实例中，使用相
//          同的数据，只会绘制完这6个顶点才会步进到下一个单位。
void main()
{
    gl_Position = modelMatrix[gl_InstanceID] * vec4(aPos, 1.0);
}

glDrawArraysInstanced


void glDrawArraysInstanced(GLenum  mode,
                           GLint   first,
                           GLsizei count,
                           GLsizei instanceCount);
// 绘制instanceCount个实例，每个实例由first起始的count个顶点按mode模式绘制而成。
// mode、first、count参数同glDrawArrays函数参数的意义一样。
// instanceCount:    绘制的实例数量
// 和下面代码效果相同
if( mode or count is invalid )
    generate appropriate error
else 
{
    for( int i = 0; i < instanceCount; i++ ) {
        instanceID = i;        // 对应顶点着色器中的gl_InstanceID的值
        glDrawArrays( mode, first, count );
    }
    instanceID = 0;
}

GL_INVALID_ENUM is generated if mode is not one of the accepted values.
GL_INVALID_OPERATION is generated if a geometry shader is active and mode is incompatible with the input primitive type of the geometry shader in the currently installed program object.
GL_INVALID_VALUE is generated if count or primcount are negative.
GL_INVALID_OPERATION is generated if a non-zero buffer object name is bound to an enabled array and the buffer object’s data store is currently mapped.
glDrawElementsInstanced
```cpp

void glDrawElementsInstanced(GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instanceCount);

// 上面glDrawArraysInstanced的elements版本。

// mode、count、type、indices参数同glDrawElements参数的意义一样。 // instanceCount: 绘制的实例数量

// 和下面代码效果相同 if( mode, count, or type is invalid ) generate appropriate error else { for( int i = 0; i < instanceCount; i++ ) { instanceID = i; // 对应顶点着色器中的gl_InstanceID的值 glDrawElements( mode, count, type, indices ); } instanceID = 0; }


- **GL_INVALID_ENUM **is generated if mode is not one of GL_POINTS, GL_LINE_STRIP, GL_LINE_LOOP, GL_LINES, GL_TRIANGLE_STRIP, GL_TRIANGLE_FAN, or GL_TRIANGLES.
- **GL_INVALID_VALUE **is generated if count or primcount are negative.
- **GL_INVALID_OPERATION **is generated if a geometry shader is active and mode is incompatible with the input primitive type of the geometry shader in the currently installed program object.
- **GL_INVALID_OPERATION **is generated if a non-zero buffer object name is bound to an enabled array and the buffer object's data store is currently mapped.
<a name="X8Fnu"></a>
## gl_InstanceID
顶点着色器内建变量，表示当前迭代的顶点所属的实例ID。
<a name="TGQpa"></a>
# 二、实例化数组
按照上面代码中的注释改进出的代码如下：
```cpp
// *************************************
// 顶点着色器代码
// *************************************
#version 330 core
layout(location = 0)  in vec3 aPos;
layout(location = 10) in mat4 aModel;    // 矩阵数据作为顶点属性数据输入
// 顶点属性允许的数据类型最大大小等于一个vec4，显然不能直接设置矩阵类型属性
// 解决方法是把矩阵看成连续的4个vec4类型顶点属性来设置，所以上面的矩阵类型顶点属性可以看做以下类型：
// layout(location = 10) in vec4 part1;
// layout(location = 11) in vec4 part2;
// layout(location = 12) in vec4 part3;
// layout(location = 13) in vec4 part4;
void main()
{
    gl_Position = aModel * vec4(aPos, 1.0);
}
// *************************************
// C++ 代码
// *************************************
GLuint VAO;                        // VAO
GLuint VBO_model;                // 保存模型矩阵数据的VBO
GLuint VBO_pos;                    // 保存顶点位置数据的VBO
const GLuint QUAD_NUM = 1000;    // 绘制的矩形数量
glm::mat4 models[QUAD_NUM];        // 每个矩形对应一个模型变换矩阵，用于变换到不同位置
// 将矩阵数据载入VBO，并设置为顶点属性数据，总结一句话就是为了达到下面这句话的效果
// layout(location = 10) in mat4 aModel;
loadInstanceVertexAttrib(VAO, VBO_model, models, QUAD_NUM, 10);
// 函数实现如下
void loadInstanceVertexAttrib(const GLuint &VAO,             // VAO
                              GLuint        &VBO,             // 保存矩阵数据的VBO
                              glm::mat4    *models,            // 要实例化的矩阵数组
                              const GLuint &modelNum,        // 矩阵数组长度
                              const GLuint &startLocation)    // 矩阵顶点属性位置值（第一个vec4的位置值）
{
    const GLsizei vec4Size     = sizeof( glm::vec4 );    // vec4大小
    const GLsizei stride       = 4 * vec4Size;            // 步长，即为一个矩阵长度
    const GLint   componentNum = 4;                        // 分量数量，每个vec4有4个GLfloat分量
    const GLuint  location        = 10;                       
    glGenBuffers(1, &VBO);
    glBindBuffers(GL_ARRAY_BUFFER, VBO);
    glBufferData(GL_ARRAY_BUFFER, modelNum * vec4Size, models, GL_STATIC_DRAW);
    glBindVertexArray(VAO);        // VAO在前面设置aPos顶点属性数据时，已经创建
    glEnableVertexAttribArray(startLocation + 0);
    glEnableVertexAttribArray(startLocation + 1);
    glEnableVertexAttribArray(startLocation + 2);
    glEnableVertexAttribArray(startLocation + 3);
    glVertexAttribPointer(startLocation + 0, componentNum, GL_FLOAT, stride, (GLvoid*)(0 * vec4Size));
    glVertexAttribPointer(startLocation + 1, componentNum, GL_FLOAT, stride, (GLvoid*)(1 * vec4Size));
    glVertexAttribPointer(startLocation + 2, componentNum, GL_FLOAT, stride, (GLvoid*)(2 * vec4Size));
    glVertexAttribPointer(startLocation + 3, componentNum, GL_FLOAT, stride, (GLvoid*)(3 * vec4Size));
    // 每个实例对应一个矩阵数据，函数解释见下面。
    glVertexAttribDivisor(startLocation + 0, 1);
    glVertexAttribDivisor(startLocation + 1, 1);
    glVertexAttribDivisor(startLocation + 2, 1);
    glVertexAttribDivisor(startLocation + 3, 1);
    glBindVertexArray(0);
}

glVertexAttribDivisor


void glVertexAttribDivisor(GLuint index,
                           GLuint divisor);
// 设置通用顶点属性在实例化渲染时的前进速率（更新速率）
// index:    通用顶点属性索引, 取值范围[0, GL_MAX_VERTEX_ATTRIBS)
// divisor:    决定前进速率（advance rate）。
//                 divisor =  0时，表示每个顶点迭代更新一次数据，就是每个顶点对应一个数据，默认情况。
//                 divisor >= 1时，表示每divisor个实例渲染迭代更新一次数据，就是每divisor个实例对应一个数据。

GL_INVALID_VALUE is generated if index is greater than or equal to the value of GL_MAX_VERTEX_ATTRIBS.

三、实践项目

绘制一个数以千计陨石组成的行星带。
https://github.com/JackieLong/OpenGL/tree/main/project_instancing_test

计算机科学

OpenGL_实例化渲染（instanced rendering）

一、绘制实例

glDrawArraysInstanced

glDrawElementsInstanced

glVertexAttribDivisor

三、实践项目