GridFileManager.cpp
Go to the documentation of this file.
1 /*
2 * This file is part of ArmarX.
3 *
4 * ArmarX is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * ArmarX is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 *
16 * @package MemoryX::Core
17 * @author Alexey Kozlov ( kozlov at kit dot edu)
18 * @date Sep 27, 2012
19 * @copyright http://www.gnu.org/licenses/gpl-2.0.txt
20 * GNU General Public License
21 */
22 
23 #include "GridFileManager.h"
24 
25 #include <filesystem>
26 #include <fstream>
27 #include <memory>
28 #include <random>
29 
30 #include <boost/regex.hpp>
31 
32 #include <IceUtil/UUID.h>
33 
37 
39 
40 namespace memoryx
41 {
42  namespace fs = std::filesystem;
43 
44  GridFileManager::GridFileManager(const CommonStorageInterfacePrx& databasePrx) :
45  databasePrx(databasePrx)
46  {
47  std::string armarxCachePath = armarx::ArmarXDataPath::GetCachePath();
48  if (!armarxCachePath.empty())
49  {
50  init(armarxCachePath);
51  }
52  else
53  {
54  static std::string cachePath =
55  (std::filesystem::temp_directory_path() / std::to_string(std::random_device{}()))
56  .string();
57  init(cachePath);
58  }
59  }
60 
61  GridFileManager::GridFileManager(const CommonStorageInterfacePrx& databasePrx,
62  const std::string& cachePath) :
63  databasePrx(databasePrx)
64  {
65  init(cachePath);
66  }
67 
69  {
70  }
71 
72  void
73  GridFileManager::init(std::string cachePath)
74  {
77  ARMARX_DEBUG_S << "Cache path: " << cachePath;
78 
79  if (!fs::exists(cachePath))
80  {
81  fs::create_directory(cachePath);
82  }
83 
84  fileCachePath = fs::path(cachePath) / fs::path("files");
85 
86  if (!fs::exists(fileCachePath))
87  {
88  fs::create_directory(fileCachePath);
89  }
90 
91  // make path absolute to prevent errors with relative paths
92  if (!fileCachePath.is_absolute())
93  {
95  std::filesystem::path fullPath(std::filesystem::current_path());
96  fileCachePath = fullPath / fileCachePath;
97  }
98  }
99 
100  std::string
102  {
103  return fileCachePath.string();
104  }
105 
106  bool
107  GridFileManager::ensureFileInCache(GridFileInterfacePrx& filePrx,
108  std::string& cacheFileName,
109  bool preserveOriginalName)
110  {
111  ARMARX_TRACE;
112  if (!filePrx)
113  {
114  return false;
115  }
116 
117  // files could be stored in local cache in two ways:
118  // - preserving original file name (e.g. for textures used from .iv-file)
119  // - under special names <mongo file id>_<MD5 file hash>. it's preferable since it allows
120  // to check the identity of local and remote files both simple and reliable
121  fs::path filenameFromProxy(filePrx->getFilename());
122  //ARMARX_INFO << "filenameFromProxy = " << filenameFromProxy;
123  fs::path localFile = fileCachePath / filenameFromProxy.stem();
124  //ARMARX_INFO << "localFile = " << localFile;
125  if (!preserveOriginalName)
126  {
127  localFile += fs::path(filePrx->getId() + "_" + filePrx->getMD5());
128  }
129  std::string localFileStr = localFile.string();
130  std::string extensionStr = filenameFromProxy.extension().string();
131  localFile = fs::path(localFileStr + extensionStr);
132  // check if file was already cached:
133  // 1) check file name (and so MD5 hash if not in preserveOriginalName mode)
134  bool cached = fs::exists(localFile);
135  if (cached)
136  {
137  // 2) check file size
138  cached = cached && ((std::uintmax_t)filePrx->getFileSize() == fs::file_size(localFile));
139  // 3) check file date (only needed in preserveOriginalName mode, otherwise MD5 match should suffice)
140  ARMARX_TRACE;
141  fs::file_time_type lwt = fs::last_write_time(localFile);
142  auto sctp = std::chrono::time_point_cast<std::chrono::system_clock::duration>(
143  lwt - fs::file_time_type::clock::now() + std::chrono::system_clock::now());
144  time_t time = std::chrono::system_clock::to_time_t(sctp);
145  ARMARX_TRACE;
146  cached = cached && (!preserveOriginalName || (time >= filePrx->getUploadDate() / 1000));
147  }
148 
149  // ARMARX_VERBOSE << "Local file time: " << fs::last_write_time(localFile) << " size: " << fs::file_size(localFile) << std::endl;
150  // ARMARX_VERBOSE << "Remote file time: " << filePrx->getUploadDate() << " size: " << filePrx->getFileSize() << std::endl;
151 
152  if (!cached)
153  {
154  ARMARX_TRACE;
155  // assure directory is present
156  std::filesystem::path filePath;
157  filePath = localFile;
158  filePath = filePath.parent_path();
159  create_directories(filePath);
160 
161  ARMARX_VERBOSE << "Caching file to: " << localFile << std::flush;
162  const std::string tmpLocalFile = localFile.string() + IceUtil::generateUUID() + ".part";
163  std::fstream fsOut;
164  fsOut.open(tmpLocalFile.c_str(), std::ios_base::out | std::ios_base::binary);
165 
166  memoryx::Blob buffer;
167 
168  ARMARX_TRACE;
169  while (filePrx->getNextChunk(buffer))
170  {
171  fsOut.write((char*)&buffer[0], buffer.size());
172  }
173 
174  fsOut.close();
175  fs::rename(tmpLocalFile, localFile);
176  }
177 
178  cacheFileName = localFile.string();
179  return true;
180  }
181 
182  bool
183  GridFileManager::ensureFileInCache(const EntityAttributeBasePtr& fileAttr,
184  std::string& cacheFileName,
185  bool preserveOriginalName)
186  {
187  ARMARX_TRACE;
188  bool result = false;
189  GridFileInterfacePrx filePrx = getFileProxyFromAttr(fileAttr);
190 
191  if (!filePrx)
192  {
193  return false;
194  }
195 
196  result = ensureFileInCache(filePrx, cacheFileName, preserveOriginalName);
197  ARMARX_TRACE;
198  databasePrx->releaseFileProxy(filePrx);
199  return result;
200  }
201 
202  bool
203  GridFileManager::ensureFilesInCache(const EntityAttributeBasePtr& fileAttr,
204  std::vector<std::string>& cacheFileNames,
205  bool preserveOriginalNames /* = false */)
206  {
207  ARMARX_TRACE;
208  bool result = false;
209 
210  GridFileList filePrxList = getFileProxiesFromAttr(fileAttr);
211 
212  if (filePrxList.empty())
213  {
214  return result;
215  }
216 
217  std::string cacheFileName;
218 
219  for (GridFileList::iterator it = filePrxList.begin(); it != filePrxList.end(); ++it)
220  {
221  ARMARX_TRACE;
222  result |= ensureFileInCache(*it, cacheFileName, preserveOriginalNames);
223  cacheFileNames.push_back(cacheFileName);
224  databasePrx->releaseFileProxy(*it);
225  }
226 
227  return result;
228  }
229 
230  bool
231  GridFileManager::ensureFilesInCache(const EntityAttributeBasePtr& fileAttr,
232  bool preserveOriginalNames /* = false */)
233  {
234  ARMARX_TRACE;
235  std::vector<std::string> cacheFileNames;
236  return ensureFilesInCache(fileAttr, cacheFileNames, preserveOriginalNames);
237  }
238 
239  bool
240  GridFileManager::getFileStream(GridFileInterfacePrx& filePrx, std::ifstream& fs)
241  {
242  ARMARX_TRACE;
243  std::string cacheFileName;
244 
245  if (ensureFileInCache(filePrx, cacheFileName, false))
246  {
247  ARMARX_TRACE;
248  fs.open(cacheFileName.c_str(), std::ios_base::in);
249  return true;
250  }
251 
252  return false;
253  }
254 
255  bool
256  GridFileManager::getFileStream(const EntityAttributeBasePtr& fileAttr, std::ifstream& fs)
257  {
258  ARMARX_TRACE;
259  GridFileInterfacePrx filePrx = getFileProxyFromAttr(fileAttr);
260  bool result = getFileStream(filePrx, fs);
261  databasePrx->releaseFileProxy(filePrx);
262  return result;
263  }
264 
265  std::string
266  GridFileManager::storeFileToAttr(const std::string& filesDBName,
267  const std::string& localFileName,
268  EntityAttributeBasePtr& fileAttr,
269  const std::string& gridFSName /* = "" */)
270  {
271  ARMARX_TRACE;
272  return addFileToAttr(filesDBName, localFileName, fileAttr, gridFSName);
273  }
274 
275  std::string
276  GridFileManager::addFileToAttr(const std::string& filesDBName,
277  const std::string& localFileName,
278  EntityAttributeBasePtr& fileAttr,
279  const std::string& gridFSName /* = "" */)
280  {
281  ARMARX_TRACE;
282  std::filesystem::path fname(localFileName);
283  fname = std::filesystem::absolute(fname);
284  std::string fileId;
285  try
286  {
287  ARMARX_TRACE;
288  fileId = databasePrx->storeFile(
289  filesDBName,
290  fname.string(),
291  (gridFSName.empty() ? fname.filename().string() : gridFSName));
292  }
293  catch (const FileNotFoundException& e)
294  {
295  ARMARX_TRACE;
296  // try again but now by reading file on this machine and sending it over ice
297  std::ifstream fin(fname.string().c_str(), std::ios::binary);
298  if (!fin.is_open())
299  {
300  throw FileNotFoundException("File not found: " + fname.string(), fname.string());
301  }
302  std::stringstream contentStream;
303  contentStream << fin.rdbuf();
304  fileId = databasePrx->storeTextFile(
305  filesDBName,
306  contentStream.str(),
307  (gridFSName.empty() ? fname.filename().string() : gridFSName));
308  }
309 
310  MongoDBRefPtr fileRef = new MongoDBRef(filesDBName, fileId);
311 
312  const armarx::VariantPtr fileVar = new armarx::Variant(fileRef._ptr);
313  fileAttr->addValue(fileVar);
314 
315  return fileId;
316  }
317 
318  bool
319  GridFileManager::storeDirectoryToAttr(const std::string& filesDBName,
320  const std::string& localDirectoryName,
321  EntityAttributeBasePtr& fileAttr,
322  std::string excludeFilter)
323  {
324  ARMARX_TRACE;
325  boost::regex exclude(excludeFilter.c_str());
326 
327  bool success = true;
328  fileAttr->clear();
329 
330  std::filesystem::path relativePathBase = localDirectoryName;
331  relativePathBase = relativePathBase.parent_path();
332 
333  // go through directory recursively
334  for (std::filesystem::recursive_directory_iterator end, dir(localDirectoryName.c_str());
335  dir != end;
336  ++dir)
337  {
338  // only add non directories
339  if (dir->status().type() != std::filesystem::file_type::directory)
340  {
341  if (boost::regex_match(dir->path().filename().c_str(), exclude))
342  {
343  dir.disable_recursion_pending();
344  }
345  else
346  {
347  std::string fileName = makeRelativePath(dir->path(), relativePathBase);
348  ARMARX_VERBOSE_S << "Adding file '" << dir->path() << "' with name "
349  << fileName;
350  const std::string fileId =
351  addFileToAttr(filesDBName, dir->path().c_str(), fileAttr, fileName);
352  success &= !fileId.empty();
353  }
354  }
355  }
356 
357  return success;
358  }
359 
360  bool
361  GridFileManager::storeFilesToAttr(const std::string& filesDBName,
362  const std::string& localBaseDirectoryName,
363  const std::vector<std::string>& localFiles,
364  EntityAttributeBasePtr& fileAttr)
365  {
366  ARMARX_TRACE;
367  bool success = true;
368  fileAttr->clear();
369 
370  std::filesystem::path relativePathBase = localBaseDirectoryName;
371  relativePathBase = relativePathBase.parent_path();
372 
373  for (size_t i = 0; i < localFiles.size(); i++)
374  {
375  std::filesystem::path f = localFiles[i];
376  std::string fileName = makeRelativePath(f, relativePathBase);
377  ARMARX_VERBOSE_S << "Adding file " << f << "with name " << fileName;
378  const std::string fileId = addFileToAttr(filesDBName, f.c_str(), fileAttr, fileName);
379  success &= !fileId.empty();
380  }
381 
382  return success;
383  }
384 
385  bool
386  GridFileManager::removeAttrFile(const EntityAttributeBasePtr& fileAttr, unsigned int fileIndex)
387  {
388  ARMARX_TRACE;
389  const MongoDBRefPtr fileRef = extractMongoDBRef(fileAttr->getValueAt(fileIndex));
390 
391  if (!fileRef)
392  {
393  return false;
394  }
395 
396  return databasePrx->removeFileById(fileRef->dbName, fileRef->docId);
397  }
398 
399  bool
400  GridFileManager::removeAttrFiles(const EntityAttributeBasePtr& fileAttr)
401  {
402  ARMARX_TRACE;
403  if (!fileAttr)
404  {
405  return false;
406  }
407 
408  bool result = true;
409 
410  for (size_t i = 0; i < (size_t)fileAttr->size(); ++i)
411  {
412  result &= removeAttrFile(fileAttr, i);
413  }
414 
415  return result;
416  }
417 
418  GridFileInterfacePrx
419  GridFileManager::getFileProxyFromAttr(const AttributeBasePtr& attr) const
420  {
421  ARMARX_TRACE;
422  const MongoDBRefPtr fileRef = extractMongoDBRef(attr);
423 
424  if (fileRef)
425  {
426  return databasePrx->getFileProxyById(fileRef->dbName, fileRef->docId);
427  }
428  else
429  {
430  ARMARX_WARNING << "Could not get fileref for attribute " << attr->getName();
431  return GridFileInterfacePrx();
432  }
433  }
434 
435  GridFileList
436  GridFileManager::getFileProxiesFromAttr(const AttributeBasePtr& attr) const
437  {
438  ARMARX_TRACE;
439  GridFileList result;
440  const EntityAttributePtr entityAttr = EntityAttributePtr::dynamicCast(attr);
441 
442  if (!entityAttr)
443  {
444  return result;
445  }
446 
447  for (size_t i = 0; i < (size_t)entityAttr->size(); ++i)
448  {
449  const MongoDBRefPtr fileRef = extractMongoDBRef(entityAttr->getValueAt(i));
450 
451  if (fileRef)
452  {
453  const GridFileInterfacePrx prx =
454  databasePrx->getFileProxyById(fileRef->dbName, fileRef->docId);
455  result.push_back(prx);
456  }
457  }
458 
459  return result;
460  }
461 
463  GridFileManager::extractMongoDBRef(const AttributeBasePtr& attr) const
464  {
465  ARMARX_TRACE;
466  if (attr)
467  {
468  ARMARX_TRACE;
469  const EntityAttributePtr entityAttr = EntityAttributePtr::dynamicCast(attr);
470 
471  if (entityAttr)
472  {
473  return extractMongoDBRef(entityAttr->getValue());
474  }
475  else
476  {
477  ARMARX_WARNING << "Could not cast attribute " << attr->getName();
478  }
479  }
480  else
481  {
482  ARMARX_WARNING << "NULL Attribute";
483  }
484 
485  return MongoDBRefPtr();
486  }
487 
489  GridFileManager::extractMongoDBRef(const armarx::VariantBasePtr& value) const
490  {
491  ARMARX_TRACE;
492  const armarx::VariantPtr fileVar = armarx::VariantPtr::dynamicCast(value);
493 
494  if (fileVar)
495  {
496  return fileVar->getClass<MongoDBRef>();
497  }
498  else
499  {
500  return MongoDBRefPtr();
501  }
502  }
503 
504  std::string
505  GridFileManager::makeRelativePath(const std::filesystem::path& directory,
506  const std::filesystem::path& basePath)
507  {
508  ARMARX_TRACE;
509  std::filesystem::path diffpath;
510  std::filesystem::path tmppath = directory;
511 
512  while (tmppath != basePath)
513  {
514  diffpath = tmppath.filename() / diffpath;
515  tmppath = tmppath.parent_path();
516 
517  if (tmppath.empty())
518  {
519  // no relative path found, take complete path
520  diffpath = directory;
521  break;
522  }
523  }
524 
525  return diffpath.string();
526  }
527 } // namespace memoryx
memoryx::GridFileManager::GridFileManager
GridFileManager(const CommonStorageInterfacePrx &databasePrx)
Constructs new GridFileManager.
Definition: GridFileManager.cpp:44
armarx::Variant
The Variant class is described here: Variants.
Definition: Variant.h:223
memoryx::GridFileManager::getFileStream
bool getFileStream(GridFileInterfacePrx &filePrx, std::ifstream &fs)
Caches the file locally and opens a filestream for it.
Definition: GridFileManager.cpp:240
ARMARX_VERBOSE
#define ARMARX_VERBOSE
Definition: Logging.h:187
memoryx::GridFileManager::storeFilesToAttr
bool storeFilesToAttr(const std::string &filesDBName, const std::string &localBaseDirectoryName, const std::vector< std::string > &localFiles, EntityAttributeBasePtr &fileAttr)
Stores a set of files with in GridFS and puts a reference to it into entity attribute.
Definition: GridFileManager.cpp:361
memoryx::GridFileManager::removeAttrFiles
bool removeAttrFiles(const EntityAttributeBasePtr &fileAttr)
Removes all GridFS files referenced by entity attribute.
Definition: GridFileManager.cpp:400
trace.h
GridFileManager.h
memoryx
VirtualRobot headers.
Definition: CommonPlacesTester.cpp:48
memoryx::GridFileManager::ensureFileInCache
bool ensureFileInCache(GridFileInterfacePrx &filePrx, std::string &cacheFileName, bool preserveOriginalName=false)
Caches the file locally and returns the filename.
Definition: GridFileManager.cpp:107
memoryx::GridFileManager::removeAttrFile
bool removeAttrFile(const EntityAttributeBasePtr &fileAttr, unsigned int fileIndex)
Definition: GridFileManager.cpp:386
IceInternal::Handle
Definition: forward_declarations.h:8
ARMARX_TRACE
#define ARMARX_TRACE
Definition: trace.h:77
cxxopts::value
std::shared_ptr< Value > value()
Definition: cxxopts.hpp:855
memoryx::GridFileManager::ensureFilesInCache
bool ensureFilesInCache(const EntityAttributeBasePtr &fileAttr, std::vector< std::string > &cacheFileNames, bool preserveOriginalNames=false)
Caches multiple files locally.
Definition: GridFileManager.cpp:203
ARMARX_DEBUG_S
#define ARMARX_DEBUG_S
Definition: Logging.h:205
armarx::ArmarXDataPath::GetCachePath
static std::string GetCachePath()
The base Cache directory of ArmarX.
Definition: ArmarXDataPath.cpp:729
EntityAttribute.h
armarx::VariantType::MongoDBRef
const VariantTypeId MongoDBRef
Definition: MongoDBRef.h:32
armarx::flush
const LogSender::manipulator flush
Definition: LogSender.h:251
memoryx::GridFileManager::storeFileToAttr
std::string storeFileToAttr(const std::string &filesDBName, const std::string &localFileName, EntityAttributeBasePtr &fileAttr, const std::string &gridFSName="")
Stores a file in GridFS and puts a reference to it into entity attribute.
Definition: GridFileManager.cpp:266
armarx::to_string
const std::string & to_string(const std::string &s)
Definition: StringHelpers.h:41
memoryx::GridFileManager::getFileCachePath
std::string getFileCachePath() const
Retrieves a local path where files will be cached.
Definition: GridFileManager.cpp:101
memoryx::GridFileManager::init
void init(std::string cachePath)
Definition: GridFileManager.cpp:73
armarx::ArmarXDataPath::ReplaceEnvVars
static bool ReplaceEnvVars(std::string &string)
ReplaceEnvVars replaces environment variables in a string with their values, if the env.
Definition: ArmarXDataPath.cpp:480
memoryx::MongoDBRefPtr
IceInternal::Handle< MongoDBRef > MongoDBRefPtr
Definition: MongoDBRef.h:114
ARMARX_VERBOSE_S
#define ARMARX_VERBOSE_S
Definition: Logging.h:207
memoryx::GridFileManager::~GridFileManager
~GridFileManager() override
Definition: GridFileManager.cpp:68
ARMARX_WARNING
#define ARMARX_WARNING
Definition: Logging.h:193
memoryx::GridFileManager::storeDirectoryToAttr
bool storeDirectoryToAttr(const std::string &filesDBName, const std::string &localDirectoryName, EntityAttributeBasePtr &fileAttr, std::string excludeFilter=".svn")
Stores a complete directory tree in GridFS and puts a reference to it into entity attribute Overwrite...
Definition: GridFileManager.cpp:319
memoryx::GridFileManager::addFileToAttr
std::string addFileToAttr(const std::string &filesDBName, const std::string &localFileName, EntityAttributeBasePtr &fileAttr, const std::string &gridFSName="")
Stores a file in GridFS and puts a reference to it into entity attribute.
Definition: GridFileManager.cpp:276
ArmarXDataPath.h
Application.h
memoryx::EntityAttributePtr
IceInternal::Handle< EntityAttribute > EntityAttributePtr
Typedef of EntityAttributePtr as IceInternal::Handle<EntityAttribute> for convenience.
Definition: EntityAttribute.h:40