void
heap_insert(Relationrelation, HeapTupletup, CommandIdcid,
intoptions, BulkInsertStatebistate)
//relation:某张表 heaptuple:行数据指针,指向行数据 cid//命令id options:选项 statte:状态
{
// 获得目前的事物id
TransactionIdxid=GetCurrentTransactionId();
HeapTuple heaptup; //页中真正行数据 局部变量
Buffer buffer; //缓冲 int
Buffer vmbuffer=InvalidBuffer; // int 0
bool all_visible_cleared=false;
/
Fill in tuple header fields and toast the tuple if necessary.
Note: below this point, heaptup is the data we actually intend to store
into the relation; tup is the caller’s original untoasted data.
/
// 填充行数据的头部和数据部分
heaptup=heap_prepare_insert(relation, tup, xid, cid, options);
/
Find buffer to insert this tuple into. If the page is all visible,
this will also pin the requisite visibility map page.
/
// 寻找缓冲来去插入
//数据表 缓冲大小 选项 状态 缓冲 这里传入一个指针
buffer=RelationGetBufferForTuple(relation, heaptup->t_len,
InvalidBuffer, options, bistate,
&vmbuffer, NULL);
/
We’re about to do the actual insert — but check for conflict first, to
avoid possibly having to roll back work we’ve just done.
This is safe without a recheck as long as there is no possibility of
another process scanning the page between this check and the insert
being visible to the scan (i.e., an exclusive buffer content lock is
continuously held from this point until the tuple insert is visible).
For a heap insert, we only need to check for table-level SSI locks. Our
new tuple can’t possibly conflict with existing tuple locks, and heap
page locks are only consolidated versions of tuple locks; they do not
lock “gaps” as index page locks do. So we don’t need to specify a
buffer when making the call, which makes for a faster check.
/
// 序列化检查
CheckForSerializableConflictIn(relation, NULL, InvalidBuffer);
/ NO EREPORT(ERROR) from here till changes are logged /
// 变量+1
START_CRIT_SECTION();
// 插入数据到缓冲中
RelationPutHeapTuple(relation, buffer, heaptup,
(options&HEAP_INSERT_SPECULATIVE) !=0);
if (PageIsAllVisible(BufferGetPage(buffer)))
{
// 判断是否可见
all_visible_cleared=true;
PageClearAllVisible(BufferGetPage(buffer));
visibilitymap_clear(relation,
ItemPointerGetBlockNumber(&(heaptup->t_self)),
vmbuffer, VISIBILITYMAP_VALID_BITS);
}
/
XXX Should we set PageSetPrunable on this page ?
The inserting transaction may eventually abort thus making this tuple
DEAD and hence available for pruning. Though we don’t want to optimize
for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
aborted tuple will never be pruned until next vacuum is triggered.
If you do add PageSetPrunable here, add it in heap_xlog_insert too.
/
// 设置缓冲区为脏快
MarkBufferDirty(buffer);
/ XLOG stuff /
// 是否需要wal日志
if (!(options&HEAP_INSERT_SKIP_WAL) &&RelationNeedsWAL(relation))
{
xl_heap_insertxlrec;
xl_heap_headerxlhdr;
XLogRecPtr recptr;
Page page=BufferGetPage(buffer); //获取缓冲 数据已经插入了
uint8 info=XLOG_HEAP_INSERT;
int bufflags=0;
/
If this is a catalog, we need to transmit combocids to properly
decode, so log that as well.
/
// 编码
if (RelationIsAccessibleInLogicalDecoding(relation))
log_heap_new_cid(relation, heaptup);
/
If this is the single and first tuple on page, we can reinit the
page instead of restoring the whole thing. Set flag, and hide
buffer references from XLogInsert.
如果这是第一页,并且只有一页
page该页的所有数据 使用buffer定位位置,page是指针,更容易定位
如果这是页面上的第一个元组,我们可以重新生成
页面,而不是恢复整个内容。设置标志,然后隐藏
来自XLogInsert的缓冲区引用。
看一下该标志的使用
/
// 将page转换为pageheader,tuple是具体行数据,两者不同。
//用于重启恢复,如果double-write,如果是第一个分配的数据,是否需要将整个页的数据都保存到wal中,
// 不需要,设置一个恢复标签表示需要重新分配页
if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) ==FirstOffsetNumber&&
PageGetMaxOffsetNumber(page) ==FirstOffsetNumber)
{
// 按位或 后复制
// 发现这个标志后 只需要初始化页面,而不是恢复整个页面
info|=XLOG_HEAP_INIT_PAGE;
bufflags|=REGBUF_WILL_INIT;
}
//记录序号
xlrec.offnum =ItemPointerGetOffsetNumber(&heaptup->t_self);
xlrec.flags =0;
if (all_visible_cleared)
xlrec.flags |=XLH_INSERT_ALL_VISIBLE_CLEARED;
if (options&HEAP_INSERT_SPECULATIVE)
// 设置锁标记
xlrec.flags |=XLH_INSERT_IS_SPECULATIVE;
Assert(ItemPointerGetBlockNumber(&heaptup->t_self) ==BufferGetBlockNumber(buffer));
/
For logical decoding, we need the tuple even if we’re doing a full
page write, so make sure it’s included even if we take a full-page
image. (XXX We could alternatively store a pointer into the FPW).
/
// 是否需要逻辑编码
if (RelationIsLogicallyLogged(relation) &&
!(options&HEAP_INSERT_NO_LOGICAL))
{
xlrec.flags |=XLH_INSERT_CONTAINS_NEW_TUPLE;
bufflags|=REGBUF_KEEP_DATA;
}
//开始插入 插入不应该在恢复的时候
XLogBeginInsert();
// 把日志指针插入到日志链表中
XLogRegisterData((char) &xlrec, SizeOfHeapInsert);
//header和data 设置xl的相关数据
xlhdr.t_infomask2 =heaptup->t_data->t_infomask2;
xlhdr.t_infomask =heaptup->t_data->t_infomask;
xlhdr.t_hoff =heaptup->t_data->t_hoff;
/
note we mark xlhdr as belonging to buffer; if XLogInsert decides to
write the whole page to the xlog, we don’t need to store
xl_heap_header in the xlog.
/
// 将元组所在的buffer复制到0号缓冲区
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD|bufflags);
// 注册行数据头
XLogRegisterBufData(0, (char) &xlhdr, SizeOfHeapHeader);
/ PG73FORMAT: write bitmap [+ padding] [+ oid] + data /
// 注册行数据尾
XLogRegisterBufData(0,
(char) heaptup->t_data +SizeofHeapTupleHeader,
heaptup->t_len -SizeofHeapTupleHeader);
/ filtering by origin on a row level is much more efficient /
XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
// 插入wal日志
recptr=XLogInsert(RM_HEAP_ID, info);
// 给该页设置lsn
PageSetLSN(page, recptr);
}
END_CRIT_SECTION();
// 释放锁
UnlockReleaseBuffer(buffer);
if (vmbuffer!=InvalidBuffer)
ReleaseBuffer(vmbuffer);
/
If tuple is cachable, mark it for invalidation from the caches in case
we abort. Note it is OK to do this after releasing the buffer, because
the heaptup data structure is all in local memory, not in the shared
buffer.
/
CacheInvalidateHeapTuple(relation, heaptup, NULL);
/ Note: speculative insertions are counted too, even if aborted later /
pgstat_count_heap_insert(relation, 1);
/
If heaptup is a private copy, release it. Don’t forget to copy t_self
back to the caller’s image, too.
/
if (heaptup!=tup)
{
tup->t_self =heaptup->t_self;
heap_freetuple(heaptup);
}
}