OpenPoseEstimation_8h_source.html

/*

 * This file is part of ArmarX.

 *

 * ArmarX is free software; you can redistribute it and/or modify

 * it under the terms of the GNU General Public License version 2 as

 * published by the Free Software Foundation.

 *

 * ArmarX is distributed in the hope that it will be useful, but

 * WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

 * GNU General Public License for more details.

 *

 * You should have received a copy of the GNU General Public License

 * along with this program. If not, see <http://www.gnu.org/licenses/>.

 *

 * @package    VisionX::ArmarXObjects::OpenPoseEstimation

 * @author     Stefan Reither ( stef dot reither at web dot de )

 * @date       2018

 * @copyright  http://www.gnu.org/licenses/gpl-2.0.txt

 *             GNU General Public License

 */

#pragma once


#include <ArmarXCore/core/Component.h>

#include <ArmarXCore/core/services/tasks/RunningTask.h>


#include <RobotAPI/interface/visualization/DebugDrawerInterface.h>

#include <RobotAPI/components/RobotState/RobotStateComponent.h>

#include <RobotAPI/libraries/RobotAPIComponentPlugins/ArVizComponentPlugin.h>


#include <VirtualRobot/Robot.h>


#include <Calibration/Calibration.h>

#include <Calibration/StereoCalibration.h>


#include <VisionX/libraries/human/pose/model/openpose_body_25.h>

#include <VisionX/core/ImageProcessor.h>

#include <VisionX/interface/components/OpenPoseEstimationInterface.h>

#include <VisionX/interface/components/PointCloudAndImageAndCalibrationProviderInterface.h>

#include <VisionX/components/image_processor/OpenPoseEstimation/Util.h>

#include <VisionX/components/image_processor/OpenPoseEstimation/KeypointManager.h>

#include <VisionX/components/image_processor/OpenPoseEstimation/ImageKeypointBuffer.h>


// OpenPose

#include <openpose/core/headers.hpp>

#include <openpose/pose/headers.hpp>

#include <openpose/utilities/headers.hpp>


//#ifdef OPENPOSE_MODELS

//#define MODEL_FOLDER OPENPOSE_MODELS

//#else

//#define MODEL_FOLDER ""

//#endif


namespace armarx

{

    enum OpenPoseEstimationMode

    {

        FromDepthImage,

        FromStereoImage,

        FromTopic

    };


    /**

     * @class OpenPoseEstimationPropertyDefinitions

     * @brief

     */

    class OpenPoseEstimationPropertyDefinitions:

        public visionx::ImageProcessorPropertyDefinitions

    {

    public:

        OpenPoseEstimationPropertyDefinitions(std::string prefix):

            visionx::ImageProcessorPropertyDefinitions(prefix)

        {

            defineOptionalProperty<std::string>("OpenPoseEstimation2DTopicName", "OpenPoseEstimation2D");

            defineOptionalProperty<std::string>("OpenPoseEstimation3DTopicName", "OpenPoseEstimation3D");

            defineOptionalProperty<int>("DepthMedianRadius", 10, "Radius of the circle around a target pixel in the depth image, which is used to calculate the median around that pixel.");

            defineOptionalProperty<bool>("UseDistortionParameters", false, "Whether to use distortion parameters when transforming image coordinates into world coordinates");


            defineRequiredProperty<std::string>("ImageProviderName", "Name of the ImageProviderComponent");

            defineOptionalProperty<std::string>("DebugDrawerTopicName", "DebugDrawerUpdates", "Name of the debug drawer topic that should be used");


            defineOptionalProperty<OpenPoseEstimationMode>("Mode", FromDepthImage, "Sets the mode of this instance of OpenPoseEstiamtion. There are currently three support modes:"

                    "'FromDepthImage', which lets the image processor expect a rgb-image and a depth-image,"

                    "'FromStereoImage', which lets the image processor expect two images as a stereo input "

                    "which will be used for calculating the 3D-values and"

                    "'FromTopic', which lets the image processor listen on a topic which provides openpose-keypoints.")

            .setMatchRegex("FromDepthImage|FromStereoImage|FromTopic")

            .map("FromDepthImage", OpenPoseEstimationMode::FromDepthImage)

            .map("FromStereoImage", OpenPoseEstimationMode::FromStereoImage)

            .map("FromTopic", OpenPoseEstimationMode::FromTopic);


            defineOptionalProperty<std::string>("OP_net_resolution", "-1x368", "Multiples of 16. If it is increased, the accuracy potentially increases. If it is "

                                                "decreased, the speed increases. For maximum speed-accuracy balance, it should keep the "

                                                "closest aspect ratio possible to the images or videos to be processed.\n Using `-1` in "

                                                "any of the dimensions, OP will choose the optimal aspect ratio depending on the user's "

                                                "input value.\n E.g. the default `-1x368` is equivalent to `656x368` in 16:9 resolutions, "

                                                "e.g. full HD (1980x1080) and HD (1280x720) resolutions.");

            defineOptionalProperty<std::string>("OP_output_resolution", "-1x-1", "The image resolution (display and output). Use \"-1x-1\" to force the program to use the"

                                                " input image resolution.");

            defineOptionalProperty<double>("OP_scale_gap", 0.3, "Scale gap between scales. No effect unless scale_number > 1. Initial scale is always 1. "

                                           "If you want to change the initial    calib->get scale, you actually want to multiply the "

                                           "`net_resolution` by your desired initial scale.");

            defineOptionalProperty<int>("OP_scale_number", 1, "Number of scales to average.");

            defineOptionalProperty<std::string>("OP_model_pose", human::pose::model::openpose_body_25::ModelId, "Model to be used. E.g. `BODY_25` (25 keypoints, best model), `COCO` (18 keypoints), `MPI` (15 keypoints, ~10% faster), "

                                                "MPI_4_layers` (15 keypoints, even faster but less accurate).");

            defineOptionalProperty<std::string>("OP_model_folder", "models/", "Folder path (absolute or relative) where the models (pose, face, ...) are located.");

            defineOptionalProperty<int>("OP_num_gpu_start", 0, "GPU device start number.");

            defineOptionalProperty<float>("OP_render_threshold", 0.05f, "Only estimated keypoints whose score confidences are higher than this threshold will be"

                                          " rendered.\n Generally, a high threshold (> 0.5) will only render very clear body parts;"

                                          " while small thresholds (~0.1) will also output guessed and occluded keypoints, but also"

                                          " more false positives (i.e. wrong detections).");

            defineOptionalProperty<int>("MaxDepth", 3000, "Pixels with a distance higher than this value are masked out. Only for depth camera mode.", PropertyDefinitionBase::eModifiable);

            defineOptionalProperty<int>("MaxDepthDifference", 700, "Allowed difference of depth value for one keypoint to median of all keypoints.", PropertyDefinitionBase::eModifiable);

            defineOptionalProperty<int>("MaskBrightnessIncrease", 100, "All masked pixels will be increased by this brightness for visualization. If value is <0, the original mask is shown.", PropertyDefinitionBase::eModifiable);


            defineOptionalProperty<std::string>("RobotStateComponentName", "RobotStateComponent");

            defineOptionalProperty<std::string>("CameraNodeName", "DepthCamera", "Name of the robot node for the input camera");

            defineOptionalProperty<bool>("FilterWithWorkspacePolygonActive", true, "Whether to use the WorkspacePolygon for filtering keypoints.");

            defineOptionalProperty<std::string>("WorkspacePolygon", "-5000,-5000;5000,-5000;5000,5000;-5000,5000;-5000,-5000",

                                                "A string that describes a polygon which represents the workspace of the robot.\n"

                                                "Every keypoint outside of this polygon will be masked out.\n"

                                                "Every point is seperated by a ';' (semicolon) and every point is described as 'x-value, y-value' (comma-seperated).\n"

                                                "The last point must be identical to the first point.");

            defineOptionalProperty<int>("MinimalAmountKeypoints", 5, "Minimal amount of keypoints per person. Detected persons with less valid keypoints will be discarded.");

            defineOptionalProperty<bool>("ReportOnlyNearestPerson", false, "If true, only one person is reported in the 3DTopic. The reported person is the nearest person to the robot.");

            defineOptionalProperty<bool>("ActivateOnStartup", true, "If true, poseEstimation-tasks are started after starting the component. If false, the component idles.");


            defineOptionalProperty<std::string>("Topic_Dimensions", "480x360", "Only for: 'Mode'='FromTopic'!! The dimensions of the image whose keypoints are reported over the topic.");

        }

    };


    /**

     * @defgroup Component-OpenPoseEstimation OpenPoseEstimation

     * @ingroup VisionX-Components

     * A description of the component OpenPoseEstimation.

     *

     * @class OpenPoseEstimation

     * @ingroup Component-OpenPoseEstimation

     * @brief Brief description of class OpenPoseEstimation.

     *

     * Detailed description of class OpenPoseEstimation.

     */

    class OpenPoseEstimation :

        public visionx::ImageProcessor,

        public OpenPoseEstimationInterface,

        virtual public armarx::ArVizComponentPluginUser

    {

    public:

        using PoseKeypoints = op::Array<float>;


        /**

         * @see armarx::ManagedIceObject::getDefaultName()

         */

        std::string getDefaultName() const override

        {

            return "OpenPoseEstimation";

        }


        void start(const Ice::Current& = Ice::emptyCurrent)  override;

        void stop(const Ice::Current& = Ice::emptyCurrent)  override;

        void start3DPoseEstimation(const Ice::Current& = Ice::emptyCurrent)  override;

        void stop3DPoseEstimation(const Ice::Current& = Ice::emptyCurrent)  override;


        /**

         * @see PropertyUser::createPropertyDefinitions()

         */

        armarx::PropertyDefinitionsPtr createPropertyDefinitions() override;

        static void Render2DResultImage(const CByteImage& inputImage, const CByteImage& maskedInputImage, KeypointManagerPtr keypointManager, CByteImage& resultImage, op::PoseModel poseModel, float renderThreshold, int brightnessIncrease = 100);

        static void Render2DResultImage(const CByteImage& inputImage, const CByteImage& maskedInputImage, PoseKeypoints& keypoints, CByteImage& resultImage, op::PoseModel poseModel, float renderThreshold, int brightnessIncrease = 100);

        static void VisualizeTransparentImageMask(CByteImage& resultImage, const CByteImage& maskedInputImage, int brightnessIncrease, const CByteImage& inputImage);


    protected:

        // ImageProcessor interface

        void onInitImageProcessor() override;

        void onConnectImageProcessor()  override;

        void onDisconnectImageProcessor() override;

        void onExitImageProcessor() override;

        void process() override;


        KeypointManagerPtr generate2DKeypoints(PoseKeypoints& keypoints, const CByteImage& rgbImage) const;

        void calculate2DFromOpenPose();

        void calculate2DFromTopic();

        void calculate3DFromDepthImage(KeypointManagerPtr manager);

        void calculate3DFromStereoImage(KeypointManagerPtr manager);

        void filterKeypointsBasedOnWorkspacePolygon(KeypointObjectPtr object);

        void filterToNearest();

        void visualize3DKeypoints();


        int getMedianDepthFromImage(int x, int y, int radius) const;

        void maskOutBasedOnDepth(CByteImage& image, int maxDepth);

        DrawColor24Bit getDominantColorOfPatch(const CByteImage& image, const Vec2d& point, int windowSize = 10) const;


        //Texting interface

        void onMessage(const Texting::TextMessage& text, const Ice::Current& = Ice::emptyCurrent) override;


    private:

        // Meta (Properties)

        OpenPoseEstimationMode mode;

        std::string providerName;

        int radius;

        bool useDistortionParameters;

        bool reportOnlyNearestPerson;

        bool filterWithWorkspacePolygonActive;

        std::string cameraNodeName;

        Polygon2D workspacePolygon;

        int minimalValidKeypoints;

        float renderThreshold;


        // Robot

        RobotStateComponentInterfacePrx robotStateInterface;

        VirtualRobot::RobotPtr localRobot;


        // Keypoints

        KeypointManagerPtr keypointManager;

        std::mutex keypointManagerMutex;


        // ImageBuffer und ImageInformations

        CByteImage** imageBuffer;

        CByteImage* rgbImageBuffer, *maskedrgbImageBuffer;

        std::mutex rgbImageBufferMutex;

        CByteImage* depthImageBuffer;

        std::mutex depthImageBufferMutex;

        CByteImage** openPoseResultImage;

        std::mutex resultImageBufferMutex;

        visionx::ImageProviderInfo imageProviderInfo;

        armarx::MetaInfoSizeBasePtr imageMetaInfo;

        unsigned int numImages;


        // SecondHandsTopic

        ImageKeypointBufferPtr imageKeypointBuffer;

        visionx::ImageDimension incomingKeypointDimensions;


        // Calibrations (IVT-Objects)

        const CCalibration* calibration = nullptr;

        CStereoCalibration* stereoCalibration = nullptr;


        // Visualization

        armarx::DebugDrawerInterfacePrx debugDrawerTopic;

        std::string layerName;

        int layerCounter = 0;


        // Topics

        OpenPose2DListenerPrx listener2DPrx;

        OpenPose3DListenerPrx listener3DPrx;


        // Threads and program flow information

        RunningTask<OpenPoseEstimation>::pointer_type task2DKeypoints;

        RunningTask<OpenPoseEstimation>::pointer_type task3DKeypoints;

        bool running2D;

        bool running3D;

        void run();

        bool imageUpdated; // Is true, if new images are available (when mode is 'FromTopic' this also means that corresponding keypoints are available)

        long timeProvidedImage; // Contains the timestamp of the currently available images (when mode is 'FromTopic' this is also the timestamp of the corresponding keypoints)


        // OpenPose

        std::shared_ptr<op::ScaleAndSizeExtractor> scaleAndSizeExtractor;

        std::shared_ptr<op::CvMatToOpInput> cvMatToOpInput;

        std::shared_ptr<op::CvMatToOpOutput> cvMatToOpOutput;

        std::shared_ptr<op::PoseExtractorCaffe> poseExtractorCaffe;

        std::shared_ptr<op::OpOutputToCvMat> opOutputToCvMat;

        void setupOpenPoseEnvironment();

        PoseKeypoints getPoseKeypoints(CByteImage* imageBuffer);

        op::PoseModel poseModel;


        // ErrorCounters

        std::uint64_t timeoutCounter2d{0};

        std::uint64_t readErrorCounter2d{0};

        std::uint64_t sucessCounter2d{0};

    };

}