GridFileManager.cpp
Go to the documentation of this file.
1 /*
2 * This file is part of ArmarX.
3 *
4 * ArmarX is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * ArmarX is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 *
16 * @package MemoryX::Core
17 * @author Alexey Kozlov ( kozlov at kit dot edu)
18 * @date Sep 27, 2012
19 * @copyright http://www.gnu.org/licenses/gpl-2.0.txt
20 * GNU General Public License
21 */
22 
23 #include <fstream>
24 #include <memory>
25 #include <filesystem>
26 #include <random>
27 #include <boost/regex.hpp>
28 
33 #include <IceUtil/UUID.h>
34 #include "GridFileManager.h"
35 
36 namespace memoryx
37 {
38  namespace fs = std::filesystem;
39 
40  GridFileManager::GridFileManager(const CommonStorageInterfacePrx& databasePrx) :
41  databasePrx(databasePrx)
42  {
43  std::string armarxCachePath = armarx::ArmarXDataPath::GetCachePath();
44  if (!armarxCachePath.empty())
45  {
46  init(armarxCachePath);
47  }
48  else
49  {
50  static std::string cachePath = (std::filesystem::temp_directory_path() / std::to_string(std::random_device{}())).string();
51  init(cachePath);
52  }
53  }
54 
55  GridFileManager::GridFileManager(const CommonStorageInterfacePrx& databasePrx, const std::string& cachePath) :
56  databasePrx(databasePrx)
57  {
58  init(cachePath);
59  }
60 
61 
62 
64  {
65  }
66 
67  void GridFileManager::init(std::string cachePath)
68  {
71  ARMARX_DEBUG_S << "Cache path: " << cachePath;
72 
73  if (!fs::exists(cachePath))
74  {
75  fs::create_directory(cachePath);
76  }
77 
78  fileCachePath = fs::path(cachePath) / fs::path("files");
79 
80  if (!fs::exists(fileCachePath))
81  {
82  fs::create_directory(fileCachePath);
83  }
84 
85  // make path absolute to prevent errors with relative paths
86  if (!fileCachePath.is_absolute())
87  {
89  std::filesystem::path fullPath(std::filesystem::current_path());
90  fileCachePath = fullPath / fileCachePath;
91  }
92  }
93 
95  {
96  return fileCachePath.string();
97  }
98 
99  bool GridFileManager::ensureFileInCache(GridFileInterfacePrx& filePrx, std::string& cacheFileName,
100  bool preserveOriginalName)
101  {
102  ARMARX_TRACE;
103  if (!filePrx)
104  {
105  return false;
106  }
107 
108  // files could be stored in local cache in two ways:
109  // - preserving original file name (e.g. for textures used from .iv-file)
110  // - under special names <mongo file id>_<MD5 file hash>. it's preferable since it allows
111  // to check the identity of local and remote files both simple and reliable
112  fs::path filenameFromProxy(filePrx->getFilename());
113  //ARMARX_INFO << "filenameFromProxy = " << filenameFromProxy;
114  fs::path localFile = fileCachePath / filenameFromProxy.stem();
115  //ARMARX_INFO << "localFile = " << localFile;
116  if (!preserveOriginalName)
117  {
118  localFile += fs::path(filePrx->getId() + "_" + filePrx->getMD5());
119  }
120  std::string localFileStr = localFile.string();
121  std::string extensionStr = filenameFromProxy.extension().string();
122  localFile = fs::path(localFileStr + extensionStr);
123  // check if file was already cached:
124  // 1) check file name (and so MD5 hash if not in preserveOriginalName mode)
125  bool cached = fs::exists(localFile);
126  if (cached)
127  {
128  // 2) check file size
129  cached = cached && ((std::uintmax_t) filePrx->getFileSize() == fs::file_size(localFile));
130  // 3) check file date (only needed in preserveOriginalName mode, otherwise MD5 match should suffice)
131  ARMARX_TRACE;
132  fs::file_time_type lwt = fs::last_write_time(localFile);
133  auto sctp = std::chrono::time_point_cast<std::chrono::system_clock::duration>(lwt - fs::file_time_type::clock::now()
134  + std::chrono::system_clock::now());
135  time_t time = std::chrono::system_clock::to_time_t(sctp);
136  ARMARX_TRACE;
137  cached = cached && (!preserveOriginalName || (time >= filePrx->getUploadDate() / 1000));
138  }
139 
140  // ARMARX_VERBOSE << "Local file time: " << fs::last_write_time(localFile) << " size: " << fs::file_size(localFile) << std::endl;
141  // ARMARX_VERBOSE << "Remote file time: " << filePrx->getUploadDate() << " size: " << filePrx->getFileSize() << std::endl;
142 
143  if (!cached)
144  {
145  ARMARX_TRACE;
146  // assure directory is present
147  std::filesystem::path filePath;
148  filePath = localFile;
149  filePath = filePath.parent_path();
150  create_directories(filePath);
151 
152  ARMARX_VERBOSE << "Caching file to: " << localFile << std::flush;
153  const std::string tmpLocalFile = localFile.string() + IceUtil::generateUUID() + ".part";
154  std::fstream fsOut;
155  fsOut.open(tmpLocalFile.c_str(), std::ios_base::out | std::ios_base::binary);
156 
157  memoryx::Blob buffer;
158 
159  ARMARX_TRACE;
160  while (filePrx->getNextChunk(buffer))
161  {
162  fsOut.write((char*) &buffer[0], buffer.size());
163  }
164 
165  fsOut.close();
166  fs::rename(tmpLocalFile, localFile);
167  }
168 
169  cacheFileName = localFile.string();
170  return true;
171  }
172 
173  bool GridFileManager::ensureFileInCache(const EntityAttributeBasePtr& fileAttr,
174  std::string& cacheFileName, bool preserveOriginalName)
175  {
176  ARMARX_TRACE;
177  bool result = false;
178  GridFileInterfacePrx filePrx = getFileProxyFromAttr(fileAttr);
179 
180  if (!filePrx)
181  {
182  return false;
183  }
184 
185  result = ensureFileInCache(filePrx, cacheFileName, preserveOriginalName);
186  ARMARX_TRACE;
187  databasePrx->releaseFileProxy(filePrx);
188  return result;
189  }
190 
191  bool GridFileManager::ensureFilesInCache(const EntityAttributeBasePtr& fileAttr, std::vector<std::string>& cacheFileNames,
192  bool preserveOriginalNames /* = false */)
193  {
194  ARMARX_TRACE;
195  bool result = false;
196 
197  GridFileList filePrxList = getFileProxiesFromAttr(fileAttr);
198 
199  if (filePrxList.empty())
200  {
201  return result;
202  }
203 
204  std::string cacheFileName;
205 
206  for (GridFileList::iterator it = filePrxList.begin(); it != filePrxList.end(); ++it)
207  {
208  ARMARX_TRACE;
209  result |= ensureFileInCache(*it, cacheFileName, preserveOriginalNames);
210  cacheFileNames.push_back(cacheFileName);
211  databasePrx->releaseFileProxy(*it);
212  }
213 
214  return result;
215  }
216 
217  bool GridFileManager::ensureFilesInCache(const EntityAttributeBasePtr& fileAttr,
218  bool preserveOriginalNames /* = false */)
219  {
220  ARMARX_TRACE;
221  std::vector<std::string> cacheFileNames;
222  return ensureFilesInCache(fileAttr, cacheFileNames, preserveOriginalNames);
223  }
224 
225  bool GridFileManager::getFileStream(GridFileInterfacePrx& filePrx, std::ifstream& fs)
226  {
227  ARMARX_TRACE;
228  std::string cacheFileName;
229 
230  if (ensureFileInCache(filePrx, cacheFileName, false))
231  {
232  ARMARX_TRACE;
233  fs.open(cacheFileName.c_str(), std::ios_base::in);
234  return true;
235  }
236 
237  return false;
238  }
239 
240  bool GridFileManager::getFileStream(const EntityAttributeBasePtr& fileAttr, std::ifstream& fs)
241  {
242  ARMARX_TRACE;
243  GridFileInterfacePrx filePrx = getFileProxyFromAttr(fileAttr);
244  bool result = getFileStream(filePrx, fs);
245  databasePrx->releaseFileProxy(filePrx);
246  return result;
247  }
248 
249  std::string GridFileManager::storeFileToAttr(const std::string& filesDBName,
250  const std::string& localFileName, EntityAttributeBasePtr& fileAttr, const std::string& gridFSName /* = "" */)
251  {
252  ARMARX_TRACE;
253  return addFileToAttr(filesDBName, localFileName, fileAttr, gridFSName);
254  }
255 
256  std::string GridFileManager::addFileToAttr(const std::string& filesDBName,
257  const std::string& localFileName, EntityAttributeBasePtr& fileAttr, const std::string& gridFSName /* = "" */)
258  {
259  ARMARX_TRACE;
260  std::filesystem::path fname(localFileName);
261  fname = std::filesystem::absolute(fname);
262  std::string fileId;
263  try
264  {
265  ARMARX_TRACE;
266  fileId = databasePrx->storeFile(filesDBName, fname.string(), (gridFSName.empty() ? fname.filename().string() : gridFSName));
267  }
268  catch (const FileNotFoundException& e)
269  {
270  ARMARX_TRACE;
271  // try again but now by reading file on this machine and sending it over ice
272  std::ifstream fin(fname.string().c_str(), std::ios::binary);
273  if (!fin.is_open())
274  {
275  throw FileNotFoundException("File not found: " + fname.string(), fname.string());
276  }
277  std::stringstream contentStream;
278  contentStream << fin.rdbuf();
279  fileId = databasePrx->storeTextFile(filesDBName, contentStream.str(), (gridFSName.empty() ? fname.filename().string() : gridFSName));
280  }
281 
282  MongoDBRefPtr fileRef = new MongoDBRef(filesDBName, fileId);
283 
284  const armarx::VariantPtr fileVar = new armarx::Variant(fileRef._ptr);
285  fileAttr->addValue(fileVar);
286 
287  return fileId;
288  }
289 
290  bool GridFileManager::storeDirectoryToAttr(const std::string& filesDBName,
291  const std::string& localDirectoryName, EntityAttributeBasePtr& fileAttr, std::string excludeFilter)
292  {
293  ARMARX_TRACE;
294  boost::regex exclude(excludeFilter.c_str());
295 
296  bool success = true;
297  fileAttr->clear();
298 
299  std::filesystem::path relativePathBase = localDirectoryName;
300  relativePathBase = relativePathBase.parent_path();
301 
302  // go through directory recursively
303  for (std::filesystem::recursive_directory_iterator end, dir(localDirectoryName.c_str()); dir != end; ++dir)
304  {
305  // only add non directories
306  if (dir->status().type() != std::filesystem::file_type::directory)
307  {
308  if (boost::regex_match(dir->path().filename().c_str(), exclude))
309  {
310  dir.disable_recursion_pending();
311  }
312  else
313  {
314  std::string fileName = makeRelativePath(dir->path(), relativePathBase);
315  ARMARX_VERBOSE_S << "Adding file '" << dir->path() << "' with name " << fileName;
316  const std::string fileId = addFileToAttr(filesDBName, dir->path().c_str(), fileAttr, fileName);
317  success &= !fileId.empty();
318  }
319  }
320  }
321 
322  return success;
323  }
324 
325 
326  bool GridFileManager::storeFilesToAttr(const std::string& filesDBName,
327  const std::string& localBaseDirectoryName,
328  const std::vector<std::string>& localFiles,
329  EntityAttributeBasePtr& fileAttr)
330  {
331  ARMARX_TRACE;
332  bool success = true;
333  fileAttr->clear();
334 
335  std::filesystem::path relativePathBase = localBaseDirectoryName;
336  relativePathBase = relativePathBase.parent_path();
337 
338  for (size_t i = 0; i < localFiles.size(); i++)
339  {
340  std::filesystem::path f = localFiles[i];
341  std::string fileName = makeRelativePath(f, relativePathBase);
342  ARMARX_VERBOSE_S << "Adding file " << f << "with name " << fileName;
343  const std::string fileId = addFileToAttr(filesDBName, f.c_str(), fileAttr, fileName);
344  success &= !fileId.empty();
345  }
346 
347  return success;
348  }
349 
350  bool GridFileManager::removeAttrFile(const EntityAttributeBasePtr& fileAttr, unsigned int fileIndex)
351  {
352  ARMARX_TRACE;
353  const MongoDBRefPtr fileRef = extractMongoDBRef(fileAttr->getValueAt(fileIndex));
354 
355  if (!fileRef)
356  {
357  return false;
358  }
359 
360  return databasePrx->removeFileById(fileRef->dbName, fileRef->docId);
361  }
362 
363 
364  bool GridFileManager::removeAttrFiles(const EntityAttributeBasePtr& fileAttr)
365  {
366  ARMARX_TRACE;
367  if (!fileAttr)
368  {
369  return false;
370  }
371 
372  bool result = true;
373 
374  for (size_t i = 0; i < (size_t) fileAttr->size(); ++i)
375  {
376  result &= removeAttrFile(fileAttr, i);
377  }
378 
379  return result;
380  }
381 
382 
383 
384  GridFileInterfacePrx GridFileManager::getFileProxyFromAttr(const AttributeBasePtr& attr) const
385  {
386  ARMARX_TRACE;
387  const MongoDBRefPtr fileRef = extractMongoDBRef(attr);
388 
389  if (fileRef)
390  {
391  return databasePrx->getFileProxyById(fileRef->dbName, fileRef->docId);
392  }
393  else
394  {
395  ARMARX_WARNING << "Could not get fileref for attribute " << attr->getName();
396  return GridFileInterfacePrx();
397  }
398  }
399 
400  GridFileList GridFileManager::getFileProxiesFromAttr(const AttributeBasePtr& attr) const
401  {
402  ARMARX_TRACE;
403  GridFileList result;
404  const EntityAttributePtr entityAttr = EntityAttributePtr::dynamicCast(attr);
405 
406  if (!entityAttr)
407  {
408  return result;
409  }
410 
411  for (size_t i = 0; i < (size_t) entityAttr->size(); ++i)
412  {
413  const MongoDBRefPtr fileRef = extractMongoDBRef(entityAttr->getValueAt(i));
414 
415  if (fileRef)
416  {
417  const GridFileInterfacePrx prx = databasePrx->getFileProxyById(fileRef->dbName, fileRef->docId);
418  result.push_back(prx);
419  }
420  }
421 
422  return result;
423  }
424 
425  MongoDBRefPtr GridFileManager::extractMongoDBRef(const AttributeBasePtr& attr) const
426  {
427  ARMARX_TRACE;
428  if (attr)
429  {
430  ARMARX_TRACE;
431  const EntityAttributePtr entityAttr = EntityAttributePtr::dynamicCast(attr);
432 
433  if (entityAttr)
434  {
435  return extractMongoDBRef(entityAttr->getValue());
436  }
437  else
438  {
439  ARMARX_WARNING << "Could not cast attribute " << attr->getName();
440  }
441  }
442  else
443  {
444  ARMARX_WARNING << "NULL Attribute";
445  }
446 
447  return MongoDBRefPtr();
448  }
449 
450  MongoDBRefPtr GridFileManager::extractMongoDBRef(const armarx::VariantBasePtr& value) const
451  {
452  ARMARX_TRACE;
453  const armarx::VariantPtr fileVar = armarx::VariantPtr::dynamicCast(value);
454 
455  if (fileVar)
456  {
457  return fileVar->getClass<MongoDBRef>();
458  }
459  else
460  {
461  return MongoDBRefPtr();
462  }
463  }
464 
465 
466  std::string GridFileManager::makeRelativePath(const std::filesystem::path& directory, const std::filesystem::path& basePath)
467  {
468  ARMARX_TRACE;
469  std::filesystem::path diffpath;
470  std::filesystem::path tmppath = directory;
471 
472  while (tmppath != basePath)
473  {
474  diffpath = tmppath.filename() / diffpath;
475  tmppath = tmppath.parent_path();
476 
477  if (tmppath.empty())
478  {
479  // no relative path found, take complete path
480  diffpath = directory;
481  break;
482  }
483  }
484 
485  return diffpath.string();
486  }
487 }
memoryx::GridFileManager::GridFileManager
GridFileManager(const CommonStorageInterfacePrx &databasePrx)
Constructs new GridFileManager.
Definition: GridFileManager.cpp:40
armarx::Variant
The Variant class is described here: Variants.
Definition: Variant.h:224
memoryx::GridFileManager::getFileStream
bool getFileStream(GridFileInterfacePrx &filePrx, std::ifstream &fs)
Caches the file locally and opens a filestream for it.
Definition: GridFileManager.cpp:225
ARMARX_VERBOSE
#define ARMARX_VERBOSE
Definition: Logging.h:180
memoryx::GridFileManager::storeFilesToAttr
bool storeFilesToAttr(const std::string &filesDBName, const std::string &localBaseDirectoryName, const std::vector< std::string > &localFiles, EntityAttributeBasePtr &fileAttr)
Stores a set of files with in GridFS and puts a reference to it into entity attribute.
Definition: GridFileManager.cpp:326
memoryx::GridFileManager::removeAttrFiles
bool removeAttrFiles(const EntityAttributeBasePtr &fileAttr)
Removes all GridFS files referenced by entity attribute.
Definition: GridFileManager.cpp:364
trace.h
GridFileManager.h
memoryx
VirtualRobot headers.
Definition: CommonPlacesTester.cpp:48
memoryx::GridFileManager::ensureFileInCache
bool ensureFileInCache(GridFileInterfacePrx &filePrx, std::string &cacheFileName, bool preserveOriginalName=false)
Caches the file locally and returns the filename.
Definition: GridFileManager.cpp:99
memoryx::GridFileManager::removeAttrFile
bool removeAttrFile(const EntityAttributeBasePtr &fileAttr, unsigned int fileIndex)
Definition: GridFileManager.cpp:350
IceInternal::Handle
Definition: forward_declarations.h:8
ARMARX_TRACE
#define ARMARX_TRACE
Definition: trace.h:69
cxxopts::value
std::shared_ptr< Value > value()
Definition: cxxopts.hpp:926
memoryx::GridFileManager::ensureFilesInCache
bool ensureFilesInCache(const EntityAttributeBasePtr &fileAttr, std::vector< std::string > &cacheFileNames, bool preserveOriginalNames=false)
Caches multiple files locally.
Definition: GridFileManager.cpp:191
ARMARX_DEBUG_S
#define ARMARX_DEBUG_S
Definition: Logging.h:198
armarx::ArmarXDataPath::GetCachePath
static std::string GetCachePath()
The base Cache directory of ArmarX.
Definition: ArmarXDataPath.cpp:734
EntityAttribute.h
armarx::VariantType::MongoDBRef
const VariantTypeId MongoDBRef
Definition: MongoDBRef.h:32
armarx::flush
const LogSender::manipulator flush
Definition: LogSender.h:251
memoryx::GridFileManager::storeFileToAttr
std::string storeFileToAttr(const std::string &filesDBName, const std::string &localFileName, EntityAttributeBasePtr &fileAttr, const std::string &gridFSName="")
Stores a file in GridFS and puts a reference to it into entity attribute.
Definition: GridFileManager.cpp:249
armarx::to_string
const std::string & to_string(const std::string &s)
Definition: StringHelpers.h:40
memoryx::GridFileManager::getFileCachePath
std::string getFileCachePath() const
Retrieves a local path where files will be cached.
Definition: GridFileManager.cpp:94
memoryx::GridFileManager::init
void init(std::string cachePath)
Definition: GridFileManager.cpp:67
armarx::ArmarXDataPath::ReplaceEnvVars
static bool ReplaceEnvVars(std::string &string)
ReplaceEnvVars replaces environment variables in a string with their values, if the env.
Definition: ArmarXDataPath.cpp:483
memoryx::MongoDBRefPtr
IceInternal::Handle< MongoDBRef > MongoDBRefPtr
Definition: MongoDBRef.h:103
ARMARX_VERBOSE_S
#define ARMARX_VERBOSE_S
Definition: Logging.h:200
memoryx::GridFileManager::~GridFileManager
~GridFileManager() override
Definition: GridFileManager.cpp:63
ARMARX_WARNING
#define ARMARX_WARNING
Definition: Logging.h:186
memoryx::GridFileManager::storeDirectoryToAttr
bool storeDirectoryToAttr(const std::string &filesDBName, const std::string &localDirectoryName, EntityAttributeBasePtr &fileAttr, std::string excludeFilter=".svn")
Stores a complete directory tree in GridFS and puts a reference to it into entity attribute Overwrite...
Definition: GridFileManager.cpp:290
memoryx::GridFileManager::addFileToAttr
std::string addFileToAttr(const std::string &filesDBName, const std::string &localFileName, EntityAttributeBasePtr &fileAttr, const std::string &gridFSName="")
Stores a file in GridFS and puts a reference to it into entity attribute.
Definition: GridFileManager.cpp:256
ArmarXDataPath.h
Application.h
memoryx::EntityAttributePtr
IceInternal::Handle< EntityAttribute > EntityAttributePtr
Typedef of EntityAttributePtr as IceInternal::Handle<EntityAttribute> for convenience.
Definition: EntityAttribute.h:39