elasticsearch-1.3.0 之索引代码粗略梳理

elasticsearch-1.3.0

发送请求
创建

[root@centos ~]# curl -XPUT 172.16.136.159:9200/customer?pretty
{
  "acknowledged" : true
}

索引

[root@centos ~]# curl -XPUT 172.16.136.159:9200/customer/external/1?pretty '-d { "name":"JOhn Doe"}' 
{
  "_index" : "customer",
  "_type" : "external",
  "_id" : "1",
  "_version" : 1,
  "created" : true
}
[root@centos ~]# curl -XPUT 172.16.136.159:9200/customer/external/1?pretty '-d { "name":"JOhn Doe"}' 
{
  "_index" : "customer",
  "_type" : "external",
  "_id" : "1",
  "_version" : 2,
  "created" : false
}

这里先跟踪下索引的流程,netty的bootstrap暂且不管,从HttpRequestHandler的messageReceived说起

public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) throws Exception {
    HttpRequest request = (HttpRequest) e.getMessage();
    // the netty HTTP handling always copy over the buffer to its own buffer, either in NioWorker internally
    // when reading, or using a cumalation buffer
    NettyHttpRequest httpRequest = new NettyHttpRequest(request, e.getChannel());
    serverTransport.dispatchRequest(httpRequest, new NettyHttpChannel(serverTransport, e.getChannel(), httpRequest));
    super.messageReceived(ctx, e);
}

这里的dispatchRequest启示就是NettyHttpServerTransport
NettyHttpServerTransport

void dispatchRequest(HttpRequest request, HttpChannel channel) {
    httpServerAdapter.dispatchRequest(request, channel);
}

Dispatcher,static class Dispatcher implements HttpServerAdapter

public void dispatchRequest(HttpRequest request, HttpChannel channel) {
    server.internalDispatchRequest(request, channel);
}

HttpServer

public void internalDispatchRequest(final HttpRequest request, final HttpChannel channel) {
    if (request.rawPath().startsWith("/_plugin/")) {
        RestFilterChain filterChain = restController.filterChain(pluginSiteFilter);
        filterChain.continueProcessing(request, channel);
        return;
    }
    restController.dispatchRequest(request, channel);
}

RestController的dispatchRequest()主要是executeHandler()

try {
    executeHandler(request, channel);
} catch (Throwable e) {

executeHandler方法中不同的handler处理请求,这里的handler是RestIndexAction,继承自

final RestHandler handler = getHandler(request);
if (handler != null) {
    handler.handleRequest(request, channel);
}

在BaseRestHandler中

public final void handleRequest(RestRequest request, RestChannel channel) throws Exception {
    handleRequest(request, channel, usefulHeaders.length == 0 ? client : new HeadersCopyClient(client, request, usefulHeaders));
}
protected abstract void handleRequest(RestRequest request, RestChannel channel, Client client) throws Exception;

实现父类在BaseRestHandler的handleRequest方法,最后调用NodeClient的index方法

client.index(indexRequest, new RestBuilderListener<IndexResponse>(channel) {

NodeClient的父类AbstractClient中index的实现

public void index(final IndexRequest request, final ActionListener<IndexResponse> listener) {
    execute(IndexAction.INSTANCE, request, listener);
}

NodeClient中的execute方法实现

public <Request extends ActionRequest, Response extends ActionResponse, RequestBuilder extends ActionRequestBuilder<Request, Response, RequestBuilder, Client>> void execute(Action<Request, Response, RequestBuilder, Client> action, Request request, ActionListener<Response> listener) {
    TransportAction<Request, Response> transportAction = actions.get((ClientAction)action);
    transportAction.execute(request, listener);//TransportIndexAction extends TransportShardReplicationOperationAction
}

这里的transportAction是TransportIndexAction,
其中TransportShardReplicationOperationAction是TransportIndexAction的父类,中TransportShardReplicationOperationAction继承自TransportAction,TransportAction中execute的实现

public void execute(Request request, ActionListener<Response> listener) {
    if (request.listenerThreaded()) {
        listener = new ThreadedActionListener<>(threadPool, listener, logger);
    }
    ActionRequestValidationException validationException = request.validate();
    if (validationException != null) {
        listener.onFailure(validationException);
        return;
    }
    try {
        doExecute(request, listener);
    } catch (Throwable e) {
        logger.trace("Error during transport action execution.", e);
        listener.onFailure(e);
    }
}

直接调用TransportIndexAction的doExecute

 protected void doExecute(final IndexRequest request, final ActionListener<IndexResponse> listener) {
        // if we don't have a master, we don't have metadata, that's fine, let it find a master using create index API
        if (autoCreateIndex.shouldAutoCreate(request.index(), clusterService.state())) {
            request.beforeLocalFork(); // we fork on another thread...
            createIndexAction.execute(new CreateIndexRequest(request.index()).cause("auto(index api)").masterNodeTimeout(request.timeout()), new ActionListener<CreateIndexResponse>() {
                @Override
                public void onResponse(CreateIndexResponse result) {
                    innerExecute(request, listener);
                }

                @Override
                public void onFailure(Throwable e) {
                    if (ExceptionsHelper.unwrapCause(e) instanceof IndexAlreadyExistsException) {
                        // we have the index, do it
                        try {
                            innerExecute(request, listener);
                        } catch (Throwable e1) {
                            listener.onFailure(e1);
                        }
                    } else {
                        listener.onFailure(e);
                    }
                }
            });
        } else {
            innerExecute(request, listener);
        }
    }

这里走 innerExecute(request, listener);

private void innerExecute(final IndexRequest request, final ActionListener<IndexResponse> listener) {
    super.doExecute(request, listener);
}

这里的super就是TransportShardReplicationOperationAction了,TransportShardReplicationOperationAction中doExecute的实现

protected void doExecute(Request request, ActionListener<Response> listener) {
    new AsyncShardOperationAction(request, listener).start();
}

主要两个方法,一个是获取shard,另一个是shardOperationOnPrimary;
其中shard后边再说,shardOperationOnPrimary在TransportIndexAction实现

protected PrimaryResponse<IndexResponse, IndexRequest> shardOperationOnPrimary(ClusterState clusterState, PrimaryOperationRequest shardRequest) {
    final IndexRequest request = shardRequest.request;

    // validate, if routing is required, that we got routing
    IndexMetaData indexMetaData = clusterState.metaData().index(request.index());
    MappingMetaData mappingMd = indexMetaData.mappingOrDefault(request.type());
    if (mappingMd != null && mappingMd.routing().required()) {
        if (request.routing() == null) {
            throw new RoutingMissingException(request.index(), request.type(), request.id());
        }
    }

    IndexService indexService = indicesService.indexServiceSafe(shardRequest.request.index());
    IndexShard indexShard = indexService.shardSafe(shardRequest.shardId);
    SourceToParse sourceToParse = SourceToParse.source(SourceToParse.Origin.PRIMARY, request.source()).type(request.type()).id(request.id())
            .routing(request.routing()).parent(request.parent()).timestamp(request.timestamp()).ttl(request.ttl());
    long version;
    boolean created;
    Engine.IndexingOperation op;
    if (request.opType() == IndexRequest.OpType.INDEX) {
        Engine.Index index = indexShard.prepareIndex(sourceToParse, request.version(), request.versionType(), Engine.Operation.Origin.PRIMARY, request.canHaveDuplicates());
        if (index.parsedDoc().mappingsModified()) {
            mappingUpdatedAction.updateMappingOnMaster(request.index(), index.docMapper(), indexService.indexUUID());
        }
        indexShard.index(index);
        version = index.version();
        op = index;
        created = index.created();
    } else {
        Engine.Create create = indexShard.prepareCreate(sourceToParse,
                request.version(), request.versionType(), Engine.Operation.Origin.PRIMARY, request.canHaveDuplicates(), request.autoGeneratedId());
        if (create.parsedDoc().mappingsModified()) {
            mappingUpdatedAction.updateMappingOnMaster(request.index(), create.docMapper(), indexService.indexUUID());
        }
        indexShard.create(create);
        version = create.version();
        op = create;
        created = true;
    }
    if (request.refresh()) {
        try {
            indexShard.refresh(new Engine.Refresh("refresh_flag_index").force(false));
        } catch (Throwable e) {
            // ignore
        }
    }

    // update the version on the request, so it will be used for the replicas
    request.version(version);
    request.versionType(request.versionType().versionTypeForReplicationAndRecovery());

    assert request.versionType().validateVersionForWrites(request.version());

    IndexResponse response = new IndexResponse(request.index(), request.type(), request.id(), version, created);
    return new PrimaryResponse<>(shardRequest.request, response, op);
}

走request.opType() == IndexRequest.OpType.INDEX分支,主要是indexShard.prepareIndex,indexShard.index(index)这里IndexShard是InternalIndexShard,的index实现

public ParsedDocument index(Engine.Index index) throws ElasticsearchException {
    writeAllowed(index.origin());
    index = indexingService.preIndex(index);
    try {
        if (logger.isTraceEnabled()) {
            logger.trace("index [{}][{}]{}", index.type(), index.id(), index.docs());
        }
        engine.index(index);
        index.endTime(System.nanoTime());
    } catch (RuntimeException ex) {
        indexingService.failedIndex(index);
        throw ex;
    }
    indexingService.postIndex(index);
    return index.parsedDoc();
}

indexingService对应ShardIndexingService, engine是InternalEngine,InternalEngine的index()

public void index(Index index) throws EngineException {
    final IndexWriter writer;
    try (InternalLock _ = readLock.acquire()) {
        writer = currentIndexWriter();
        try (Releasable r = throttle.acquireThrottle()) {
            innerIndex(index, writer);
        }
        dirty = true;
        possibleMergeNeeded = true;
        flushNeeded = true;
    } catch (OutOfMemoryError | IllegalStateException | IOException t) {
        maybeFailEngine(t, "index");
        throw new IndexFailedEngineException(shardId, index, t);
    }
    checkVersionMapRefresh();
}

最终在InternalEngine的innerIndex方法中调用lunece的IndexWriter的,依据是不是存在有版本,来通过 writer.addDocuments或者updateDocument方法添加或者更新索引
添加add索引

if (index.docs().size() > 1) {
    writer.addDocuments(index.docs(), index.analyzer());
} else {
    writer.addDocument(index.docs().get(0), index.analyzer());
}

更新update索引

if (index.docs().size() > 1) {
    writer.updateDocuments(index.uid(), index.docs(), index.analyzer());
} else {
    writer.updateDocument(index.uid(), index.docs().get(0), index.analyzer());
}

最后Translog

Translog.Location translogLocation = translog.add(new Translog.Index(index));

具体代码

private void innerIndex(Index index, IndexWriter writer) throws IOException {
    synchronized (dirtyLock(index.uid())) {
        final long currentVersion;
        VersionValue versionValue = versionMap.getUnderLock(index.uid().bytes());
        if (versionValue == null) {
            currentVersion = loadCurrentVersionFromIndex(index.uid());
        } else {
            if (enableGcDeletes && versionValue.delete() && (threadPool.estimatedTimeInMillis() - versionValue.time()) > gcDeletesInMillis) {
                currentVersion = Versions.NOT_FOUND; // deleted, and GC
            } else {
                currentVersion = versionValue.version();
            }
        }

        long updatedVersion;
        long expectedVersion = index.version();
        if (index.versionType().isVersionConflictForWrites(currentVersion, expectedVersion)) {
            if (index.origin() == Operation.Origin.RECOVERY) {
                return;
            } else {
                throw new VersionConflictEngineException(shardId, index.type(), index.id(), currentVersion, expectedVersion);
            }
        }
        updatedVersion = index.versionType().updateVersion(currentVersion, expectedVersion);

        index.updateVersion(updatedVersion);
        if (currentVersion == Versions.NOT_FOUND) {
            // document does not exists, we can optimize for create
            index.created(true);
            if (index.docs().size() > 1) {
                writer.addDocuments(index.docs(), index.analyzer());
            } else {
                writer.addDocument(index.docs().get(0), index.analyzer());
            }
        } else {
            if (versionValue != null) {
                index.created(versionValue.delete()); // we have a delete which is not GC'ed...
            }
            if (index.docs().size() > 1) {
                writer.updateDocuments(index.uid(), index.docs(), index.analyzer());
            } else {
                writer.updateDocument(index.uid(), index.docs().get(0), index.analyzer());
            }
        }
        Translog.Location translogLocation = translog.add(new Translog.Index(index));

        versionMap.putUnderLock(index.uid().bytes(), new VersionValue(updatedVersion, translogLocation));

        indexingService.postIndexUnderLock(index);
    }
}

link
分布式搜索Elasticsearch源码分析之二------索引过程源码概要分析

原文地址:https://www.cnblogs.com/donganwangshi/p/4318045.html