components_2object__perception_2FaceRecognition_2FaceRecognition_8cpp_source.html

/*

 * This file is part of ArmarX.

 *

 * ArmarX is free software; you can redistribute it and/or modify

 * it under the terms of the GNU General Public License version 2 as

 * published by the Free Software Foundation.

 *

 * ArmarX is distributed in the hope that it will be useful, but

 * WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

 * GNU General Public License for more details.

 *

 * You should have received a copy of the GNU General Public License

 * along with this program. If not, see <http://www.gnu.org/licenses/>.

 *

 * @package    VisionX::ArmarXObjects::FaceRecognition

 * @author     Markus Grotz ( markus dot grotz at kit dot edu )

 * @date       2016

 * @copyright  http://www.gnu.org/licenses/gpl-2.0.txt

 *             GNU General Public License

 */


#include "FaceRecognition.h"


#include <Eigen/Geometry>


#include <SimoxUtility/algorithm/string/string_tools.h>


#include <ArmarXCore/core/system/ArmarXDataPath.h>

#include <ArmarXCore/core/system/cmake/CMakePackageFinder.h>


#include <RobotAPI/libraries/core/FramedPose.h>


#include <VisionX/tools/TypeMapping.h>


#include <Image/ImageProcessor.h>

#include <Image/IplImageAdaptor.h>

#include <Image/PrimitivesDrawer.h>

#include <Image/PrimitivesDrawerCV.h>

#include <MemoryX/core/MemoryXCoreObjectFactories.h>


using namespace armarx;


void

armarx::FaceRecognition::onInitObjectLocalizerProcessor()

{


    stereoMatcher = new CStereoMatcher();


    model = cv::face::FisherFaceRecognizer(0, 1000.0);


    armarx::CMakePackageFinder finder("VisionX");

    ArmarXDataPath::addDataPaths(finder.getDataDir());


    std::string trainingDataPath = getProperty<std::string>("trainingDataPath").getValue();


    if (!ArmarXDataPath::getAbsolutePath(trainingDataPath, trainingDataPath))

    {

        ARMARX_ERROR << "Could not find data file in ArmarXDataPath: " << trainingDataPath;

    }


    std::string classifierFileName = getProperty<std::string>("classifierFileName").getValue();


    if (!ArmarXDataPath::getAbsolutePath(classifierFileName, classifierFileName))

    {

        ARMARX_ERROR << "Could not find data file in ArmarXDataPath: " << classifierFileName;

    }


    if (!std::filesystem::is_directory(trainingDataPath))

    {

        ARMARX_FATAL << "unable to load training model";

    }


    std::vector<std::string> fileNames;


    for (auto iter = std::filesystem::directory_iterator(trainingDataPath);

         iter != std::filesystem::directory_iterator();

         ++iter)

    {

        fileNames.push_back(iter->path().string());

    }


    std::sort(fileNames.begin(), fileNames.end());


    std::string currentLabel;


    std::vector<cv::Mat> images;

    std::vector<int> index;


    for (std::string& fileName : fileNames)

    {

        const auto stem = std::filesystem::path(fileName).stem().string();

        std::vector<std::string> strs = simox::alg::split(stem, "_");

        std::string label = strs[0];


        if (currentLabel != label)

        {

            labels[labels.size()] = label;

            currentLabel = label;

        }

        images.push_back(cv::imread(fileName, CV_LOAD_IMAGE_GRAYSCALE));

        index.push_back(labels.size() - 1);

    }


    ARMARX_LOG << "total number of classes: " << labels.size();


    model->train(images, index);


    faceImageSize = images[0].size();


    classifier.load(classifierFileName);

}


void

armarx::FaceRecognition::onConnectObjectLocalizerProcessor()

{

    stereoMatcher->InitCameraParameters(getStereoCalibration(), false);

}


void

armarx::FaceRecognition::onExitObjectLocalizerProcessor()

{


    delete stereoMatcher;

}


bool

armarx::FaceRecognition::initRecognizer()

{

    return true;

}


bool

armarx::FaceRecognition::addObjectClass(const memoryx::EntityPtr& objectClassEntity,

                                        const memoryx::GridFileManagerPtr& fileManager)

{

    ///@TODO

    throw std::logic_error{"bool armarx::FaceRecognition::addObjectClass not implemented yet"};

}


memoryx::ObjectLocalizationResultList

armarx::FaceRecognition::localizeObjectClasses(const std::vector<std::string>& objectClassNames,

                                               CByteImage** cameraImages,

                                               armarx::MetaInfoSizeBasePtr imageMetaInfo,

                                               CByteImage** resultImages)

{


    memoryx::ObjectLocalizationResultList resultList;

    cv::Mat result = cv::cvarrToMat(IplImageAdaptor::Adapt(resultImages[0]));


    const cv::Mat tempRGBImage = cv::cvarrToMat(IplImageAdaptor::Adapt(cameraImages[0]));

    cv::Mat original;

    cv::cvtColor(tempRGBImage, original, CV_RGB2BGR);


    cv::Mat gray;

    cv::cvtColor(original, gray, CV_BGR2GRAY);


    std::vector<cv::Rect_<int>> faces;

    classifier.detectMultiScale(gray, faces, 1.05, 2);


    const std::string refFrame = getProperty<std::string>("ReferenceFrameName").getValue();

    const std::string agentName = getProperty<std::string>("AgentName").getValue();


    ARMARX_INFO << deactivateSpam(5, std::to_string(faces.size())) << "found " << faces.size()

                << " possible faces";

    for (cv::Rect rect : faces)

    {

        cv::Mat face = gray(rect);


        cv::Mat faceResized;

        cv::resize(face, faceResized, faceImageSize, 1.0, 1.0, cv::INTER_CUBIC);


        int predictedLabel = -1;

        double predictedConfidence = 0.0;

        model->predict(faceResized, predictedLabel, predictedConfidence);


        std::string temp = labels[predictedLabel] + "=%0.1f";

        std::string label = cv::format(temp.c_str(), predictedConfidence);


        if (predictedLabel < 0)

        {

            cv::rectangle(result, rect, CV_RGB(0, 0, 255), 1);

        }

        else

        {

            cv::rectangle(result, rect, CV_RGB(0, 255, 0), 1);


            int posX = std::max(rect.tl().x - 10, 0);

            int posY = std::max(rect.tl().y - 10, 0);

            putText(result,

                    label,

                    cv::Point(posX, posY),

                    cv::FONT_HERSHEY_PLAIN,

                    1.0,

                    CV_RGB(0, 255, 0),

                    2.0);


            if (std::find(objectClassNames.begin(), objectClassNames.end(), label) !=

                objectClassNames.end())

            {

                // todo see ObjectLearningByPushing/FeatureCalculation.cpp

                CByteImage* imgRightGray = new CByteImage(

                    cameraImages[1]->width, cameraImages[1]->height, CByteImage::eGrayScale);

                CByteImage* imgLeftGray = new CByteImage(

                    cameraImages[1]->width, cameraImages[1]->height, CByteImage::eGrayScale);

                ::ImageProcessor::ConvertImage(cameraImages[0], imgLeftGray);

                ::ImageProcessor::ConvertImage(cameraImages[1], imgRightGray);


                Vec2d vCorrespondingPointRight;

                Vec3d vPoint3D;

                const int nDispMin = stereoMatcher->GetDisparityEstimate(10000);

                const int nDispMax = stereoMatcher->GetDisparityEstimate(500);


                // img l, img r, px, py, size of correlation window, min disparity, max disparity,

                // corresponding point 2d, 3d point, correlation threshold, images are undistorted

                int nMatchingResult = stereoMatcher->Match(imgLeftGray,

                                                           imgRightGray,

                                                           (int)rect.x + rect.width / 2.0,

                                                           (int)rect.y + rect.height / 2.0,

                                                           std::max(rect.width, rect.height),

                                                           nDispMin,

                                                           nDispMax,

                                                           vCorrespondingPointRight,

                                                           vPoint3D,

                                                           0.7f,

                                                           true);


                delete imgRightGray;

                delete imgLeftGray;


                if (nMatchingResult >= 0)

                {

                    Eigen::Vector3f position(vPoint3D.x, vPoint3D.y, vPoint3D.z);

                    Eigen::Matrix3f orientation =

                        Eigen::Matrix3f::Identity() *

                        Eigen::AngleAxisf(-M_PI, Eigen::Vector3f::UnitZ());


                    memoryx::ObjectLocalizationResult result;


                    result.position = new armarx::FramedPosition(position, refFrame, agentName);

                    result.orientation =

                        new armarx::FramedOrientation(orientation, refFrame, agentName);

                    result.recognitionCertainty = 1.0 / predictedConfidence;

                    result.positionNoise = calculateLocalizationUncertainty(position);

                    result.objectClassName = label;


                    resultList.push_back(result);

                }

            }

        }

    }


    return resultList;

}


armarx::PropertyDefinitionsPtr

FaceRecognition::createPropertyDefinitions()

{

    return armarx::PropertyDefinitionsPtr(

        new FaceRecognitionPropertyDefinitions(getConfigIdentifier()));

}