d6/d42/RGBDOpenPoseEstimationComponent_8cpp_source.html

/*

 * This file is part of ArmarX.

 *

 * ArmarX is free software; you can redistribute it and/or modify

 * it under the terms of the GNU General Public License version 2 as

 * published by the Free Software Foundation.

 *

 * ArmarX is distributed in the hope that it will be useful, but

 * WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

 * GNU General Public License for more details.

 *

 * You should have received a copy of the GNU General Public License

 * along with this program. If not, see <http://www.gnu.org/licenses/>.

 *

 * @package    armarx::ArmarXObjects::OpenPoseEstimation

 * @author     Stefan Reither ( stef dot reither at web dot de )

 * @date       2018

 * @copyright  http://www.gnu.org/licenses/gpl-2.0.txt

 *             GNU General Public License

 */


#include "RGBDOpenPoseEstimationComponent.h"


#include <SimoxUtility/algorithm/string.h>


#include <RobotAPI/libraries/core/remoterobot/RemoteRobot.h>


#include <VisionX/interface/components/RGBDImageProvider.h>

#include <VisionX/tools/ImageUtil.h>

#include <VisionX/tools/TypeMapping.h>

//#include <VisionX/libraries/ArViz/HumanPoseBody25.h>


using namespace armarx;


void

RGBDOpenPoseEstimationComponentPluginUser::postCreatePropertyDefinitions(

    armarx::PropertyDefinitionsPtr& def)

{

    Base::postCreatePropertyDefinitions(def);


    def->topic(listener3DPrx, "OpenPoseEstimation3D", "OpenPoseEstimation3DTopicName");

    def->component(robotStateInterface, "RobotStateComponent");


    def->optional(useDistortionParameters,

                  "UseDistortionParameters",

                  "Whether to use distortion parameters when transforming image coordinates into "

                  "world coordinates");


    def->optional(radius, "DepthMedianRadius", "Depth Median Radius");

    def->optional(

        maxDepth,

        "MaxDepth",

        "Pixels with a distance higher than this value are masked out. Only for depth camera mode.",

        PropertyDefinitionBase::eModifiable);

    def->optional(maxDepthDifference,

                  "MaxDepthDifference",

                  "Allowed difference of depth value for one keypoint to median of all keypoints.",

                  PropertyDefinitionBase::eModifiable);

    def->optional(cameraNodeName, "CameraNodeName", "The robot node name of the camera");


    def->optional(brightnessIncrease, "BrightnessIncrease", "Increase brightness of masked pixels");

}


void

RGBDOpenPoseEstimationComponentPluginUser::postOnConnectImageProcessor()

{

    Base::postOnConnectImageProcessor();


    // Trying to access robotStateComponent if it is avaiable

    localRobot = RemoteRobot::createLocalClone(robotStateInterface);


    ARMARX_VERBOSE << "Trying to get StereoCalibrationInterface proxy: "

                   << getIceManager()->getCommunicator()->proxyToString(imageProviderInfo.proxy);

    visionx::StereoCalibrationInterfacePrx calib =

        visionx::StereoCalibrationInterfacePrx::checkedCast(imageProviderInfo.proxy);

    if (!calib)

    {

        ARMARX_ERROR << "Image provider does not provide a stereo calibration - 3D will not work - "

                     << imageProviderInfo.proxy->ice_ids();

    }

    else

    {

        ARMARX_VERBOSE << "got StereoCalibrationInterface proxy";

        CStereoCalibration* stereoCalibration =

            visionx::tools::convert(calib->getStereoCalibration());

        ARMARX_VERBOSE << "got StereoCalibration";

        calibration = stereoCalibration->GetLeftCalibration();

        ARMARX_VERBOSE << "got mono Calibration";

    }


    if (active_upon_startup)

    {

        start3DPoseEstimation();

    }

}


void

RGBDOpenPoseEstimationComponentPluginUser::preOnDisconnectImageProcessor()

{

    Base::preOnDisconnectImageProcessor();


    delete maskedrgbImageBuffer;

    delete depthImageBuffer;

}


void

RGBDOpenPoseEstimationComponentPluginUser::preOnInitImageProcessor()

{

    Base::preOnInitImageProcessor();

}


void

RGBDOpenPoseEstimationComponentPluginUser::preOnConnectImageProcessor()

{

    Base::preOnConnectImageProcessor();

}


void

RGBDOpenPoseEstimationComponentPluginUser::postOnDisconnectImageProcessor()

{

    Base::postOnDisconnectImageProcessor();

}


RGBDOpenPoseEstimationComponentPluginUser::RGBDOpenPoseEstimationComponentPluginUser()

{

}


void

RGBDOpenPoseEstimationComponentPluginUser::renderOutputImage(const op::Array<float>& op_array)

{

    Base::renderOutputImage(op_array);


    if (!localRobot || !running3D)

    {

        ARMARX_WARNING << deactivateSpam()

                       << "Could not render output image for 3d pose estimation";

        return;

    }


    std::unique_lock depthImage_lock(depthImageBufferMutex);


    maskOutputImageBasedOnDepth();

    RemoteRobot::synchronizeLocalCloneToTimestamp(

        localRobot, robotStateInterface, timestamp_of_update);


    const int depthThreshold = maxDepthDifference;

    openposeResult3D.clear();


    for (const auto& [name, entity] : openposeResult)

    {

        if (entity.keypointMap.size() == 0)

        {

            continue;

        }


        std::map<std::string, int>

            depthStorage; // we do not want to store perspective depth-values in a Keypoint, we transfer them into world-coordiantes (in cameraframe) anyways

        std::vector<int> depthsCopy;


        // Get camera depth information from image coordinates

        for (const auto& [kp_name, point] : entity.keypointMap)

        {

            int depth = getMedianDepthFromImage(

                static_cast<int>(point.x), static_cast<int>(point.y), radius);

            depthStorage[kp_name] = depth;

            depthsCopy.push_back(depth);

        }


        // Find outlier in depth values and set them to median of depth values

        std::sort(depthsCopy.begin(), depthsCopy.end());

        const int medianDepth = depthsCopy.at(depthsCopy.size() / 2);

        for (auto& [storage_name, depth] : depthStorage)

        {

            if (depth > medianDepth + depthThreshold || depth < medianDepth - depthThreshold)

            {

                depth = medianDepth;

            }

        }


        // Transform pixel + depth into world coordinates

        // and update stored 3d entity

        openposeResult3D[name] = HumanPose3D();

        for (const auto& [kp_name, point] : entity.keypointMap)

        {

            Vec2d imagePoint;

            imagePoint.x = point.x;

            imagePoint.y = point.y;

            Vec3d result;

            ARMARX_CHECK_EXPRESSION(calibration);

            calibration->ImageToWorldCoordinates(imagePoint,

                                                 result,

                                                 static_cast<float>(depthStorage.at(kp_name)),

                                                 useDistortionParameters);


            FramedPosition pos = FramedPosition(Eigen::Vector3f(result.x, result.y, result.z),

                                                cameraNodeName,

                                                localRobot->getName());

            auto globalEigen = pos.toGlobalEigen(localRobot);

            openposeResult3D[name].keypointMap[kp_name].x = result.x;

            openposeResult3D[name].keypointMap[kp_name].y = result.y;

            openposeResult3D[name].keypointMap[kp_name].z = result.z;


            openposeResult3D[name].keypointMap[kp_name].globalX = globalEigen(0);

            openposeResult3D[name].keypointMap[kp_name].globalY = globalEigen(1);

            openposeResult3D[name].keypointMap[kp_name].globalZ = globalEigen(2);


            openposeResult3D[name].keypointMap[kp_name].label = point.label;

            openposeResult3D[name].keypointMap[kp_name].confidence = point.confidence;

            openposeResult3D[name].keypointMap[kp_name].dominantColor = point.dominantColor;

        }

    }


    filterToNearest();

}


void

RGBDOpenPoseEstimationComponentPluginUser::reportEntities()

{

    Base::reportEntities();

    ARMARX_DEBUG << deactivateSpam(0.5) << "Reporting 3Dkeypoints for " << openposeResult3D.size()

                 << " entities";

    listener3DPrx->report3DKeypoints(openposeResult3D, timestamp_of_update);

}


int

RGBDOpenPoseEstimationComponentPluginUser::getMedianDepthFromImage(int x, int y, int radius) const

{

    std::vector<int> depths;

    for (int xoffset = -radius; xoffset < radius; xoffset++)

    {

        int xo = x + xoffset;

        if (xo < 0 || xo > depthImageBuffer->width)

        {

            continue;

        }

        for (int yoffset = -radius; yoffset < radius; yoffset++)

        {

            int yo = y + yoffset;

            if (yo < 0 || yo > depthImageBuffer->height)

            {

                continue;

            }


            // Check whether (x,y) is in circle:

            if (xoffset * xoffset + yoffset * yoffset <= radius * radius)

            {

                unsigned int pixelPos =

                    static_cast<unsigned int>(3 * (yo * depthImageBuffer->width + xo));

                int z_value = depthImageBuffer->pixels[pixelPos + 0] +

                              (depthImageBuffer->pixels[pixelPos + 1] << 8) +

                              (depthImageBuffer->pixels[pixelPos + 2] << 16);

                if (z_value > 0)

                {

                    depths.push_back(z_value);

                }

            }

        }

    }

    std::sort(depths.begin(), depths.end());


    return depths.empty() ? 0 : depths[depths.size() / 2];

}


void

RGBDOpenPoseEstimationComponentPluginUser::maskOutputImageBasedOnDepth()

{

    if (maxDepth <= 0)

    {

        return;

    }


    ARMARX_CHECK_NOT_NULL(openposeResultImage);

    ARMARX_CHECK_NOT_NULL(openposeResultImage[0]);


    ARMARX_CHECK_NOT_NULL(depthImageBuffer);


    int pixelCount = depthImageBuffer->width * depthImageBuffer->height;

    int depthThresholdmm = maxDepth;

    CByteImage depthMaskImage(

        depthImageBuffer->width, depthImageBuffer->height, CByteImage::eGrayScale);

    CByteImage brightenMaskImage(

        openposeResultImage[0]->width, openposeResultImage[0]->height, CByteImage::eGrayScale);

    ::ImageProcessor::Zero(&depthMaskImage);

    ::ImageProcessor::Zero(&brightenMaskImage);


    for (int i = 0; i < pixelCount; i += 1)

    {

        int z_value = depthImageBuffer->pixels[i * 3 + 0] +

                      (depthImageBuffer->pixels[i * 3 + 1] << 8) +

                      (depthImageBuffer->pixels[i * 3 + 2] << 16);

        depthMaskImage.pixels[i] = z_value > depthThresholdmm || z_value == 0 ? 0 : 255;

    }


    ::ImageProcessor::Erode(&depthMaskImage, &depthMaskImage, 5);

    ::ImageProcessor::Dilate(&depthMaskImage, &depthMaskImage, 20);


    for (int i = 0; i < pixelCount; i += 1)

    {

        if (depthMaskImage.pixels[i] == 0)

        {

            // set to green

            openposeResultImage[0]->pixels[i * 3] = 0;

            openposeResultImage[0]->pixels[i * 3 + 1] = 255;

            openposeResultImage[0]->pixels[i * 3 + 2] = 0;


            // add brightness to mask

            brightenMaskImage.pixels[i] = 255;

        }

    }


    // brighten if necessary

    if (brightnessIncrease > 0)

    {

        CByteImage smoothedImageMask(&brightenMaskImage);

        ::ImageProcessor::GaussianSmooth5x5(&brightenMaskImage, &smoothedImageMask);


        for (int i = 0; i < pixelCount; i += 1)

        {

            if (depthMaskImage.pixels[i] == 0)

            {

                float perc = static_cast<float>(smoothedImageMask.pixels[i]) / 255.f;

                int effectiveBrightnessIncrease = brightnessIncrease * perc;

                openposeResultImage[0]->pixels[i * 3] = std::min<int>(

                    openposeResultImage[0]->pixels[i * 3] + effectiveBrightnessIncrease, 255);

                openposeResultImage[0]->pixels[i * 3 + 1] = std::min<int>(

                    openposeResultImage[0]->pixels[i * 3 + 1] + effectiveBrightnessIncrease, 255);

                openposeResultImage[0]->pixels[i * 3 + 2] = std::min<int>(

                    openposeResultImage[0]->pixels[i * 3 + 2] + effectiveBrightnessIncrease, 255);

            }

        }

    }

}


void

RGBDOpenPoseEstimationComponentPluginUser::stop(const Ice::Current& c)

{

    stop3DPoseEstimation(c);

    Base::stop(c);

}


void

RGBDOpenPoseEstimationComponentPluginUser::start3DPoseEstimation(const Ice::Current&)

{

    if (running3D)

    {

        return;

    }

    else

    {

        ARMARX_INFO << "Starting OpenposeEstimation -- 3D";

        running3D = true;

    }

}


void

RGBDOpenPoseEstimationComponentPluginUser::stop3DPoseEstimation(const Ice::Current&)

{

    start();

    if (running3D)

    {

        ARMARX_INFO << "Stopping OpenposeEstimation -- 3D";

        running3D = false;

    }

}


void

RGBDOpenPoseEstimationComponentPluginUser::enableHumanPoseEstimation(

    const EnableHumanPoseEstimationInput& input,

    const Ice::Current&)

{

    running3D = input.enable3d;

    Base::enableHumanPoseEstimation(input);

}


/*void OpenPoseEstimationComponent::filterEntitiesBasedOnWorkspacePolygon()

{

    // polygon was: -5000,-5000;5000,-5000;5000,5000;-5000,5000;-5000,-5000

    // Setup workspace-polygon

    std::vector<Polygon2D::Point> points;

    std::vector<std::string> pointStrings = simox::alg::split(pluginUser.workspacePolygonString, ";");

    for (auto s : pointStrings)

    {

        ARMARX_VERBOSE << "WorkspacePolygon: " << s;

        std::vector<std::string> workSpacePolygonPoint = simox::alg::split(s, ",");

        ARMARX_CHECK_EXPRESSION(workSpacePolygonPoint.size() == 2);

        Polygon2D::Point point;

        point.x = std::strtof(workSpacePolygonPoint.at(0).c_str(), nullptr);

        point.y = std::strtof(workSpacePolygonPoint.at(1).c_str(), nullptr);

        points.push_back(point);

    }

    pluginUser.workspacePolygon = Polygon2D(points);


    if (!localRobot || !running3D_is_possible || !running3D)

    {

        return;

    }


    Entity3DMap::iterator iter = openposeResult3D.begin();

    while (iter != openposeResult3D.end())

    {

        const std::string& name = iter->first;

        const Entity3D& entity = iter->second;


        for(const auto& [kp_name, point] : entity.keypointMap)

        {

            FramedPositionPtr pos = point->get3D()->toGlobal(localRobot); // Changing frame of copy

            if (!workspacePolygon.isInside(pos))

            {

                ARMARX_VERBOSE << deactivateSpam(1) << "removing entity due out of workspace";

                iter = openposeResult3D.erase(iter);

            }

            else

            {

                iter++;

            }

        }

    }

}*/


void

RGBDOpenPoseEstimationComponentPluginUser::filterToNearest()

{

    if (!reportOnlyNearestPerson || openposeResult3D.size() == 0)

    {

        return;

    }


    std::vector<std::pair<std::string, HumanPose3D>> poses;

    for (const auto& [key, humanPose] : openposeResult3D)

    {

        poses.push_back({key, humanPose});

    }


    std::sort(poses.begin(),

              poses.end(),

              [](std::pair<std::string, HumanPose3D> o1, std::pair<std::string, HumanPose3D> o2)

              {

                  auto humanPose1 = o1.second;

                  auto humanPose2 = o2.second;

                  float humanPose1AverageDepth = std::numeric_limits<float>::quiet_NaN();

                  float humanPose2AverageDepth = std::numeric_limits<float>::quiet_NaN();

                  if (humanPose1.keypointMap.size() == 0)

                  {

                      float result = 0.0f;

                      int amountDepths = 0;

                      for (const auto& [k, point] : humanPose1.keypointMap)

                      {

                          result += point.z;

                          amountDepths++;

                      }

                      humanPose1AverageDepth = result / static_cast<float>(amountDepths);

                  }


                  if (humanPose2.keypointMap.size() == 0)

                  {

                      float result = 0.0f;

                      int amountDepths = 0;

                      for (const auto& [k, point] : humanPose2.keypointMap)

                      {

                          result += point.z;

                          amountDepths++;

                      }

                      humanPose2AverageDepth = result / static_cast<float>(amountDepths);

                  }


                  return humanPose1AverageDepth < humanPose2AverageDepth;

              });

    poses.resize(1);


    openposeResult3D.clear();

    for (const auto& p : poses)

    {

        openposeResult3D[p.first] = p.second;

    }

}


void

RGBDOpenPoseEstimationComponentPluginUser::visualize()

{

    // also show in ArViz

    viz::Layer openPoseArVizLayer = arviz.layer(layerName);

    if (openposeResult3D.empty())

    {

        arviz.commit(openPoseArVizLayer);


        return;

    }


    int human = 1;

    for (const auto& [name, humanPose] : openposeResult3D)

    {

        std::string objectName = "human_" + name;

        //armarx::viz::HumanPoseBody25::addPoseToLayer(humanPose.keypointMap, openPoseArVizLayer, objectName);

        human++;

    }


    arviz.commit(openPoseArVizLayer);

    openposeResult3D.clear(); // its the last step

    Base::visualize();

}