backend Oct 30

Populating ElasticSearch using serialization groups in ApiPlatform

4 min read –
ElasticSearch

ApiPlatform provides a reading integration for ElasticSearch. But it is developers job to populate the index. Since each entity needs its own mapping, it can be quite time consuming writing it. Therefore, I decided to use serialization groups and let the normalizer do the job.

Annotation class

First of all , we will create ElasticSearch annotation. It will be later used for defining the populating groups in resources:

<?php

namespace App\Annotation;

/**
 * @Annotation
 * @Target({"CLASS"})
 */
class ElasticSearch
{
    public array $groups = [];

    public function __construct(array $groups)
    {
        $this->groups = $groups;
    }
}

Resource with ElasticSearch groups

Now we can use it on the resource we would like to populate. It’s possible to add groups to relations too, but make sure you add the group to relation id.

<?php

namespace App\Entity;

use ApiPlatform\Core\Annotation\ApiResource;
use App\Annotation\ElasticSearch;
use Doctrine\ORM\Mapping as ORM;
use Symfony\Component\Serializer\Annotation\Groups;

/**
 * @ApiResource()
 * @ElasticSearch(groups={"elastic_blog_post"})
 */
class BlogPost
{
    /**
     * @ORM\Id
     * @ORM\GeneratedValue
     * @ORM\Column(type="integer")
     *
     * @Groups({"elastic_blog_post"})
     */
    private int $id;

    /**
     * @ORM\Column(type="datetime")
     *
     * @Groups({"elastic_blog_post"})
     */
    private \DateTimeInterface $createdAt;

    /**
     * @ORM\Column(type="string")
     *
     * @Groups({"elastic_blog_post"})
     */
    private string $title;

    /**
     * @ORM\Column(type="text")
     *
     * @Groups({"elastic_blog_post"})
     */
    private string $content;

    // getters & setters ...
}

Creating ElasticSearch index and populating data

Next step is to create a command which will create indexes for all resources without mapping. Dynamic field mapping will take care of that when the data starts to populate.

<?php

namespace App\Command;

use ApiPlatform\Core\Bridge\Elasticsearch\Metadata\Document\Factory\DocumentMetadataFactoryInterface;
use ApiPlatform\Core\Metadata\Resource\Factory\ResourceNameCollectionFactoryInterface;
use App\Annotation\ElasticSearch;
use App\Service\ElasticService;
use Doctrine\Common\Annotations\Reader;
use Doctrine\Persistence\ManagerRegistry;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;

class PopulateElasticIndexCommand extends Command
{
    protected static $defaultName = 'elastic:populate';

    private DocumentMetadataFactoryInterface $documentMetadataFactory;
    private ManagerRegistry $managerRegistry;
    private ResourceNameCollectionFactoryInterface $resourceNameCollectionFactory;
    private Reader $reader;
    private ElasticService $elasticService;

    public function __construct(
        DocumentMetadataFactoryInterface $documentMetadataFactory,
        ResourceNameCollectionFactoryInterface $resourceNameCollectionFactory,
        ManagerRegistry $managerRegistry,
        Reader $reader,
        ElasticService $elasticService
    )
    {
        parent::__construct();

        $this->documentMetadataFactory = $documentMetadataFactory;
        $this->managerRegistry = $managerRegistry;
        $this->resourceNameCollectionFactory = $resourceNameCollectionFactory;
        $this->reader = $reader;
        $this->elasticService = $elasticService;
    }
    protected function execute(InputInterface $input, OutputInterface $output)
    {
//      Find all ApiPlatform resources and create indexes
        $resources = $this->resourceNameCollectionFactory->create();

        $output->writeln('Creating indexes ...');

        try {
            $this->elasticService->createIndexes($resources);
        } catch (\Exception $e) {
            $output->writeln($e->getMessage());

            return 0;
        }

        $output->writeln('Created indexes, populating data ...');

        foreach ($resources as $resourceClass) {

//          Get ElasticSearch annotation groups
            $elasticSearchContext = $this->reader->getClassAnnotation(
                new \ReflectionClass($resourceClass), 
                ElasticSearch::class
            );

            if (!$elasticSearchContext instanceof ElasticSearch) {
                continue;
            }

//           Get repository for current resource and pull data from DB
            $manager = $this->managerRegistry->getManagerForClass($resourceClass);
            $repository = $manager->getRepository($resourceClass);

            $data = $repository->findAll();

//           Populate each object to ElasticSearch
            $documentMetadata = $this->documentMetadataFactory->create($resourceClass);

            foreach ($data as $object) {
                $output->writeln('Populating ' . $resourceClass);
                $this->elasticService->createItem(
                    $object, 
                    $elasticSearchContext, 
                    $documentMetadata
                );
            }
        }

        return 1;
    }
}

Event Subscriber for keeping everything up to date

Since our index needs to be up to date, we should update it on each change in the database. Depending on your application, you can do it either in Event Subscriber or Data Persister. In this case we are using Event Subscriber:

<?php

namespace App\EventSubscriber;

use ApiPlatform\Core\Bridge\Elasticsearch\Metadata\Document\Factory\DocumentMetadataFactoryInterface;
use App\Annotation\ElasticSearch;
use App\Service\ElasticService;
use Doctrine\Common\Annotations\Reader;
use Doctrine\Common\EventSubscriber;
use Doctrine\ORM\Event\LifecycleEventArgs;

class ElasticSubscriber implements EventSubscriber
{
    private Reader $reader;
    private DocumentMetadataFactoryInterface $documentMetadataFactory;
    private ElasticService $elasticService;

    public function __construct(
      Reader $reader, 
      DocumentMetadataFactoryInterface $documentMetadataFactory, 
      ElasticService $elasticService
    ) {
        $this->reader = $reader;
        $this->documentMetadataFactory = $documentMetadataFactory;
        $this->elasticService = $elasticService;
    }

    public function getSubscribedEvents()
    {
        return [
            'postPersist', 'postRemove', 'postUpdate',
        ];
    }

    public function postPersist(LifecycleEventArgs $args): void
    {
        $item = $args->getObject();
        $elasticSearchContext = $this->reader->getClassAnnotation(
          new \ReflectionClass(get_class($item)), 
          ElasticSearch::class
        );

        if (!$elasticSearchContext instanceof ElasticSearch) {
            return;
        }

        $documentMetadata = $this->documentMetadataFactory->create(get_class($item));

        $this->elasticService->createItem($item, $elasticSearchContext, $documentMetadata);
    }

    public function postUpdate(LifecycleEventArgs $args): void
    {
        $item = $args->getObject();
        $elasticSearchContext = $this->reader->getClassAnnotation(
          new \ReflectionClass(get_class($item)), 
          ElasticSearch::class
        );

        if (!$elasticSearchContext instanceof ElasticSearch) {
            return;
        }

        $documentMetadata = $this->documentMetadataFactory->create(get_class($item));

        $this->elasticService->updateItem($item, $elasticSearchContext, $documentMetadata);
    }

    public function postRemove(LifecycleEventArgs $args): void
    {
        $item = $args->getObject();
        $elasticSearchContext = $this->reader->getClassAnnotation(
          new \ReflectionClass(get_class($item)), 
          ElasticSearch::class
        );

        if (!$elasticSearchContext instanceof ElasticSearch) {
            return;
        }

        $documentMetadata = $this->documentMetadataFactory->create(get_class($item));

        $this->elasticService->deleteItem($item, $documentMetadata);
    }
}

In each action we are fetching serialization groups, so they can be sent to the normalizer. Finally, we call ElasticService which will send a request to ElasticSearch.

ElasticSearch Service

The following service is in charge of creating and sending requests to ElasticSearch. Symfony normalizer will generate body for the request using our groups:

<?php

namespace App\Service;

use ApiPlatform\Core\Bridge\Elasticsearch\Metadata\Document\DocumentMetadata;
use ApiPlatform\Core\Metadata\Resource\Factory\ResourceMetadataFactoryInterface;
use ApiPlatform\Core\Metadata\Resource\ResourceNameCollection;
use ApiPlatform\Core\Util\Inflector;
use App\Annotation\ElasticSearch;
use Elasticsearch\Client;
use Elasticsearch\Common\Exceptions\Missing404Exception;
use Symfony\Component\Serializer\Normalizer\ObjectNormalizer;

class ElasticService
{
    private ResourceMetadataFactoryInterface $resourceMetadataFactory;
    private Client $client;
    private ObjectNormalizer $objectNormalizer;

    public function __construct(
      ResourceMetadataFactoryInterface $resourceMetadataFactory, 
      Client $client, 
      ObjectNormalizer $objectNormalizer
    ) {
        $this->resourceMetadataFactory = $resourceMetadataFactory;
        $this->client = $client;
        $this->objectNormalizer = $objectNormalizer;
    }

    public function createIndexes(ResourceNameCollection $resources): void
    {
        foreach ($resources as $resourceClass) {
            $resource = $this->resourceMetadataFactory->create($resourceClass);
            $es = $resource->getAttribute('elasticsearch');

            if ($es === false) {
                continue;
            }
          
            $index = Inflector::tableize($resource->getShortName());

            $params = [
                'index' => $index,
                'body' => [
                    'mappings' => [
                        '_doc' => [
                            'properties' => []
                        ]
                    ]
                ]
            ];

//        Delete index if already exists and create it
            try {
                $this->client->indices()->delete(['index' => $index]);
            } catch (Missing404Exception $exception) {
            }

            $this->client->indices()->create($params);
        }
    }

    public function createItem(
        $item, 
        ElasticSearch $elasticSearchContext, 
        DocumentMetadata $documentMetadata
    ): void
    {
        $body = $this->objectNormalizer->normalize($item, 'array', $elasticSearchContext->groups);

        $body['id'] = $item->getId();

        $request = [
            'index' => $documentMetadata->getIndex(),
            'type' => $documentMetadata->getType(),
            'id' => (string)$item->getId(),
            'body' => $body
        ];

        $this->client->create($request);
    }

    public function updateItem(
        $item, 
        ElasticSearch $elasticSearchContext, 
        DocumentMetadata $documentMetadata
    ): void
    {
        $body = $this->objectNormalizer->normalize($item, 'array', $elasticSearchContext->groups);

        $body['id'] = $item->getId();

        $request = [
            'index' => $documentMetadata->getIndex(),
            'type' => $documentMetadata->getType(),
            'id' => (string)$item->getId(),
            'body' => ['doc' => $body]
        ];

        $this->client->update($request);
    }

    public function deleteItem($item, DocumentMetadata $documentMetadata): void
    {
        $request = [
            'index' => $documentMetadata->getIndex(),
            'type' => $documentMetadata->getType(),
            'id' => (string)$item->getId()
        ];

        $this->client->delete($request);
    }
}

Denormalization of integer and float

You might have a problem with integer and float denormalizing to string when pulling data. But this can be easily fixed by decorating ApiPlatform\Core\Serializer\ItemNormalizer and disabling type enforcement in denormalize method:

<?php 
// ...

public function denormalize($data, $class, string $format = null, array $context = [])
{
    $context[AbstractObjectNormalizer::DISABLE_TYPE_ENFORCEMENT] = true;
    
    return $this->decorated->denormalize($data, $class, $format, $context);
}

Conclusion

Now that we prepared all needed, we are able to add a new resource to ElasticSearch by just defining groups in annotation and assigning them to fields we want to have in index.