GridFileManager.cpp
Go to the documentation of this file.
1/*
2* This file is part of ArmarX.
3*
4* ArmarX is free software; you can redistribute it and/or modify
5* it under the terms of the GNU General Public License version 2 as
6* published by the Free Software Foundation.
7*
8* ArmarX is distributed in the hope that it will be useful, but
9* WITHOUT ANY WARRANTY; without even the implied warranty of
10* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11* GNU General Public License for more details.
12*
13* You should have received a copy of the GNU General Public License
14* along with this program. If not, see <http://www.gnu.org/licenses/>.
15*
16* @package MemoryX::Core
17* @author Alexey Kozlov ( kozlov at kit dot edu)
18* @date Sep 27, 2012
19* @copyright http://www.gnu.org/licenses/gpl-2.0.txt
20* GNU General Public License
21*/
22
23#include "GridFileManager.h"
24
25#include <filesystem>
26#include <fstream>
27#include <memory>
28#include <random>
29
30#include <boost/regex.hpp>
31
32#include <IceUtil/UUID.h>
33
37
39
40namespace memoryx
41{
42 namespace fs = std::filesystem;
43
44 GridFileManager::GridFileManager(const CommonStorageInterfacePrx& databasePrx) :
45 databasePrx(databasePrx)
46 {
47 std::string armarxCachePath = armarx::ArmarXDataPath::GetCachePath();
48 if (!armarxCachePath.empty())
49 {
50 init(armarxCachePath);
51 }
52 else
53 {
54 static std::string cachePath =
55 (std::filesystem::temp_directory_path() / std::to_string(std::random_device{}()))
56 .string();
57 init(cachePath);
58 }
59 }
60
61 GridFileManager::GridFileManager(const CommonStorageInterfacePrx& databasePrx,
62 const std::string& cachePath) :
63 databasePrx(databasePrx)
64 {
65 init(cachePath);
66 }
67
71
72 void
73 GridFileManager::init(std::string cachePath)
74 {
77 ARMARX_DEBUG_S << "Cache path: " << cachePath;
78
79 if (!fs::exists(cachePath))
80 {
81 fs::create_directory(cachePath);
82 }
83
84 fileCachePath = fs::path(cachePath) / fs::path("files");
85
86 if (!fs::exists(fileCachePath))
87 {
88 fs::create_directory(fileCachePath);
89 }
90
91 // make path absolute to prevent errors with relative paths
92 if (!fileCachePath.is_absolute())
93 {
95 std::filesystem::path fullPath(std::filesystem::current_path());
96 fileCachePath = fullPath / fileCachePath;
97 }
98 }
99
100 std::string
102 {
103 return fileCachePath.string();
104 }
105
106 bool
107 GridFileManager::ensureFileInCache(GridFileInterfacePrx& filePrx,
108 std::string& cacheFileName,
109 bool preserveOriginalName)
110 {
112 if (!filePrx)
113 {
114 return false;
115 }
116
117 // files could be stored in local cache in two ways:
118 // - preserving original file name (e.g. for textures used from .iv-file)
119 // - under special names <mongo file id>_<MD5 file hash>. it's preferable since it allows
120 // to check the identity of local and remote files both simple and reliable
121 fs::path filenameFromProxy(filePrx->getFilename());
122 //ARMARX_INFO << "filenameFromProxy = " << filenameFromProxy;
123 fs::path localFile = fileCachePath / filenameFromProxy.stem();
124 //ARMARX_INFO << "localFile = " << localFile;
125 if (!preserveOriginalName)
126 {
127 localFile += fs::path(filePrx->getId() + "_" + filePrx->getMD5());
128 }
129 std::string localFileStr = localFile.string();
130 std::string extensionStr = filenameFromProxy.extension().string();
131 localFile = fs::path(localFileStr + extensionStr);
132 // check if file was already cached:
133 // 1) check file name (and so MD5 hash if not in preserveOriginalName mode)
134 bool cached = fs::exists(localFile);
135 if (cached)
136 {
137 // 2) check file size
138 cached = cached && ((std::uintmax_t)filePrx->getFileSize() == fs::file_size(localFile));
139 // 3) check file date (only needed in preserveOriginalName mode, otherwise MD5 match should suffice)
141 fs::file_time_type lwt = fs::last_write_time(localFile);
142 auto sctp = std::chrono::time_point_cast<std::chrono::system_clock::duration>(
143 lwt - fs::file_time_type::clock::now() + std::chrono::system_clock::now());
144 time_t time = std::chrono::system_clock::to_time_t(sctp);
146 cached = cached && (!preserveOriginalName || (time >= filePrx->getUploadDate() / 1000));
147 }
148
149 // ARMARX_VERBOSE << "Local file time: " << fs::last_write_time(localFile) << " size: " << fs::file_size(localFile) << std::endl;
150 // ARMARX_VERBOSE << "Remote file time: " << filePrx->getUploadDate() << " size: " << filePrx->getFileSize() << std::endl;
151
152 if (!cached)
153 {
155 // assure directory is present
156 std::filesystem::path filePath;
157 filePath = localFile;
158 filePath = filePath.parent_path();
159 create_directories(filePath);
160
161 ARMARX_VERBOSE << "Caching file to: " << localFile << std::flush;
162 const std::string tmpLocalFile = localFile.string() + IceUtil::generateUUID() + ".part";
163 std::fstream fsOut;
164 fsOut.open(tmpLocalFile.c_str(), std::ios_base::out | std::ios_base::binary);
165
166 memoryx::Blob buffer;
167
169 while (filePrx->getNextChunk(buffer))
170 {
171 fsOut.write((char*)&buffer[0], buffer.size());
172 }
173
174 fsOut.close();
175 fs::rename(tmpLocalFile, localFile);
176 }
177
178 cacheFileName = localFile.string();
179 return true;
180 }
181
182 bool
183 GridFileManager::ensureFileInCache(const EntityAttributeBasePtr& fileAttr,
184 std::string& cacheFileName,
185 bool preserveOriginalName)
186 {
188 bool result = false;
189 GridFileInterfacePrx filePrx = getFileProxyFromAttr(fileAttr);
190
191 if (!filePrx)
192 {
193 return false;
194 }
195
196 result = ensureFileInCache(filePrx, cacheFileName, preserveOriginalName);
198 databasePrx->releaseFileProxy(filePrx);
199 return result;
200 }
201
202 bool
203 GridFileManager::ensureFilesInCache(const EntityAttributeBasePtr& fileAttr,
204 std::vector<std::string>& cacheFileNames,
205 bool preserveOriginalNames /* = false */)
206 {
208 bool result = false;
209
210 GridFileList filePrxList = getFileProxiesFromAttr(fileAttr);
211
212 if (filePrxList.empty())
213 {
214 return result;
215 }
216
217 std::string cacheFileName;
218
219 for (GridFileList::iterator it = filePrxList.begin(); it != filePrxList.end(); ++it)
220 {
222 result |= ensureFileInCache(*it, cacheFileName, preserveOriginalNames);
223 cacheFileNames.push_back(cacheFileName);
224 databasePrx->releaseFileProxy(*it);
225 }
226
227 return result;
228 }
229
230 bool
231 GridFileManager::ensureFilesInCache(const EntityAttributeBasePtr& fileAttr,
232 bool preserveOriginalNames /* = false */)
233 {
235 std::vector<std::string> cacheFileNames;
236 return ensureFilesInCache(fileAttr, cacheFileNames, preserveOriginalNames);
237 }
238
239 bool
240 GridFileManager::getFileStream(GridFileInterfacePrx& filePrx, std::ifstream& fs)
241 {
243 std::string cacheFileName;
244
245 if (ensureFileInCache(filePrx, cacheFileName, false))
246 {
248 fs.open(cacheFileName.c_str(), std::ios_base::in);
249 return true;
250 }
251
252 return false;
253 }
254
255 bool
256 GridFileManager::getFileStream(const EntityAttributeBasePtr& fileAttr, std::ifstream& fs)
257 {
259 GridFileInterfacePrx filePrx = getFileProxyFromAttr(fileAttr);
260 bool result = getFileStream(filePrx, fs);
261 databasePrx->releaseFileProxy(filePrx);
262 return result;
263 }
264
265 std::string
266 GridFileManager::storeFileToAttr(const std::string& filesDBName,
267 const std::string& localFileName,
268 EntityAttributeBasePtr& fileAttr,
269 const std::string& gridFSName /* = "" */)
270 {
272 return addFileToAttr(filesDBName, localFileName, fileAttr, gridFSName);
273 }
274
275 std::string
276 GridFileManager::addFileToAttr(const std::string& filesDBName,
277 const std::string& localFileName,
278 EntityAttributeBasePtr& fileAttr,
279 const std::string& gridFSName /* = "" */)
280 {
282 std::filesystem::path fname(localFileName);
283 fname = std::filesystem::absolute(fname);
284 std::string fileId;
285 try
286 {
288 fileId = databasePrx->storeFile(
289 filesDBName,
290 fname.string(),
291 (gridFSName.empty() ? fname.filename().string() : gridFSName));
292 }
293 catch (const FileNotFoundException& e)
294 {
296 // try again but now by reading file on this machine and sending it over ice
297 std::ifstream fin(fname.string().c_str(), std::ios::binary);
298 if (!fin.is_open())
299 {
300 throw FileNotFoundException("File not found: " + fname.string(), fname.string());
301 }
302 std::stringstream contentStream;
303 contentStream << fin.rdbuf();
304 fileId = databasePrx->storeTextFile(
305 filesDBName,
306 contentStream.str(),
307 (gridFSName.empty() ? fname.filename().string() : gridFSName));
308 }
309
310 MongoDBRefPtr fileRef = new MongoDBRef(filesDBName, fileId);
311
312 const armarx::VariantPtr fileVar = new armarx::Variant(fileRef._ptr);
313 fileAttr->addValue(fileVar);
314
315 return fileId;
316 }
317
318 bool
319 GridFileManager::storeDirectoryToAttr(const std::string& filesDBName,
320 const std::string& localDirectoryName,
321 EntityAttributeBasePtr& fileAttr,
322 std::string excludeFilter)
323 {
325 boost::regex exclude(excludeFilter.c_str());
326
327 bool success = true;
328 fileAttr->clear();
329
330 std::filesystem::path relativePathBase = localDirectoryName;
331 relativePathBase = relativePathBase.parent_path();
332
333 // go through directory recursively
334 for (std::filesystem::recursive_directory_iterator end, dir(localDirectoryName.c_str());
335 dir != end;
336 ++dir)
337 {
338 // only add non directories
339 if (dir->status().type() != std::filesystem::file_type::directory)
340 {
341 if (boost::regex_match(dir->path().filename().c_str(), exclude))
342 {
343 dir.disable_recursion_pending();
344 }
345 else
346 {
347 std::string fileName = makeRelativePath(dir->path(), relativePathBase);
348 ARMARX_VERBOSE_S << "Adding file '" << dir->path() << "' with name "
349 << fileName;
350 const std::string fileId =
351 addFileToAttr(filesDBName, dir->path().c_str(), fileAttr, fileName);
352 success &= !fileId.empty();
353 }
354 }
355 }
356
357 return success;
358 }
359
360 bool
361 GridFileManager::storeFilesToAttr(const std::string& filesDBName,
362 const std::string& localBaseDirectoryName,
363 const std::vector<std::string>& localFiles,
364 EntityAttributeBasePtr& fileAttr)
365 {
367 bool success = true;
368 fileAttr->clear();
369
370 std::filesystem::path relativePathBase = localBaseDirectoryName;
371 relativePathBase = relativePathBase.parent_path();
372
373 for (size_t i = 0; i < localFiles.size(); i++)
374 {
375 std::filesystem::path f = localFiles[i];
376 std::string fileName = makeRelativePath(f, relativePathBase);
377 ARMARX_VERBOSE_S << "Adding file " << f << "with name " << fileName;
378 const std::string fileId = addFileToAttr(filesDBName, f.c_str(), fileAttr, fileName);
379 success &= !fileId.empty();
380 }
381
382 return success;
383 }
384
385 bool
386 GridFileManager::removeAttrFile(const EntityAttributeBasePtr& fileAttr, unsigned int fileIndex)
387 {
389 const MongoDBRefPtr fileRef = extractMongoDBRef(fileAttr->getValueAt(fileIndex));
390
391 if (!fileRef)
392 {
393 return false;
394 }
395
396 return databasePrx->removeFileById(fileRef->dbName, fileRef->docId);
397 }
398
399 bool
400 GridFileManager::removeAttrFiles(const EntityAttributeBasePtr& fileAttr)
401 {
403 if (!fileAttr)
404 {
405 return false;
406 }
407
408 bool result = true;
409
410 for (size_t i = 0; i < (size_t)fileAttr->size(); ++i)
411 {
412 result &= removeAttrFile(fileAttr, i);
413 }
414
415 return result;
416 }
417
418 GridFileInterfacePrx
419 GridFileManager::getFileProxyFromAttr(const AttributeBasePtr& attr) const
420 {
422 const MongoDBRefPtr fileRef = extractMongoDBRef(attr);
423
424 if (fileRef)
425 {
426 return databasePrx->getFileProxyById(fileRef->dbName, fileRef->docId);
427 }
428 else
429 {
430 ARMARX_WARNING << "Could not get fileref for attribute " << attr->getName();
431 return GridFileInterfacePrx();
432 }
433 }
434
435 GridFileList
436 GridFileManager::getFileProxiesFromAttr(const AttributeBasePtr& attr) const
437 {
439 GridFileList result;
440 const EntityAttributePtr entityAttr = EntityAttributePtr::dynamicCast(attr);
441
442 if (!entityAttr)
443 {
444 return result;
445 }
446
447 for (size_t i = 0; i < (size_t)entityAttr->size(); ++i)
448 {
449 const MongoDBRefPtr fileRef = extractMongoDBRef(entityAttr->getValueAt(i));
450
451 if (fileRef)
452 {
453 const GridFileInterfacePrx prx =
454 databasePrx->getFileProxyById(fileRef->dbName, fileRef->docId);
455 result.push_back(prx);
456 }
457 }
458
459 return result;
460 }
461
463 GridFileManager::extractMongoDBRef(const AttributeBasePtr& attr) const
464 {
466 if (attr)
467 {
469 const EntityAttributePtr entityAttr = EntityAttributePtr::dynamicCast(attr);
470
471 if (entityAttr)
472 {
473 return extractMongoDBRef(entityAttr->getValue());
474 }
475 else
476 {
477 ARMARX_WARNING << "Could not cast attribute " << attr->getName();
478 }
479 }
480 else
481 {
482 ARMARX_WARNING << "NULL Attribute";
483 }
484
485 return MongoDBRefPtr();
486 }
487
489 GridFileManager::extractMongoDBRef(const armarx::VariantBasePtr& value) const
490 {
492 const armarx::VariantPtr fileVar = armarx::VariantPtr::dynamicCast(value);
493
494 if (fileVar)
495 {
496 return fileVar->getClass<MongoDBRef>();
497 }
498 else
499 {
500 return MongoDBRefPtr();
501 }
502 }
503
504 std::string
505 GridFileManager::makeRelativePath(const std::filesystem::path& directory,
506 const std::filesystem::path& basePath)
507 {
509 std::filesystem::path diffpath;
510 std::filesystem::path tmppath = directory;
511
512 while (tmppath != basePath)
513 {
514 diffpath = tmppath.filename() / diffpath;
515 tmppath = tmppath.parent_path();
516
517 if (tmppath.empty())
518 {
519 // no relative path found, take complete path
520 diffpath = directory;
521 break;
522 }
523 }
524
525 return diffpath.string();
526 }
527} // namespace memoryx
static bool ReplaceEnvVars(std::string &string)
ReplaceEnvVars replaces environment variables in a string with their values, if the env.
static std::string GetCachePath()
The base Cache directory of ArmarX.
The Variant class is described here: Variants.
Definition Variant.h:224
bool storeFilesToAttr(const std::string &filesDBName, const std::string &localBaseDirectoryName, const std::vector< std::string > &localFiles, EntityAttributeBasePtr &fileAttr)
Stores a set of files with in GridFS and puts a reference to it into entity attribute.
void init(std::string cachePath)
bool getFileStream(GridFileInterfacePrx &filePrx, std::ifstream &fs)
Caches the file locally and opens a filestream for it.
bool ensureFileInCache(GridFileInterfacePrx &filePrx, std::string &cacheFileName, bool preserveOriginalName=false)
Caches the file locally and returns the filename.
bool removeAttrFiles(const EntityAttributeBasePtr &fileAttr)
Removes all GridFS files referenced by entity attribute.
GridFileManager(const CommonStorageInterfacePrx &databasePrx)
Constructs new GridFileManager.
bool removeAttrFile(const EntityAttributeBasePtr &fileAttr, unsigned int fileIndex)
std::string getFileCachePath() const
Retrieves a local path where files will be cached.
std::string addFileToAttr(const std::string &filesDBName, const std::string &localFileName, EntityAttributeBasePtr &fileAttr, const std::string &gridFSName="")
Stores a file in GridFS and puts a reference to it into entity attribute.
bool ensureFilesInCache(const EntityAttributeBasePtr &fileAttr, std::vector< std::string > &cacheFileNames, bool preserveOriginalNames=false)
Caches multiple files locally.
bool storeDirectoryToAttr(const std::string &filesDBName, const std::string &localDirectoryName, EntityAttributeBasePtr &fileAttr, std::string excludeFilter=".svn")
Stores a complete directory tree in GridFS and puts a reference to it into entity attribute Overwrite...
std::string storeFileToAttr(const std::string &filesDBName, const std::string &localFileName, EntityAttributeBasePtr &fileAttr, const std::string &gridFSName="")
Stores a file in GridFS and puts a reference to it into entity attribute.
Represents a cross-database reference to a document in MongoDB.
Definition MongoDBRef.h:46
#define ARMARX_DEBUG_S
The logging level for output that is only interesting while debugging.
Definition Logging.h:205
#define ARMARX_VERBOSE_S
Definition Logging.h:207
#define ARMARX_WARNING
The logging level for unexpected behaviour, but not a serious problem.
Definition Logging.h:193
#define ARMARX_VERBOSE
The logging level for verbose information.
Definition Logging.h:187
const VariantTypeId MongoDBRef
Definition MongoDBRef.h:32
IceInternal::Handle< Variant > VariantPtr
Definition Variant.h:41
::IceInternal::Handle<::armarx::VariantBase > VariantBasePtr
VirtualRobot headers.
IceInternal::Handle< EntityAttribute > EntityAttributePtr
Typedef of EntityAttributePtr as IceInternal::Handle<EntityAttribute> for convenience.
IceInternal::Handle< MongoDBRef > MongoDBRefPtr
Definition MongoDBRef.h:114
#define ARMARX_TRACE
Definition trace.h:77