wget.h 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. #ifndef GUL_WGET_H
  2. #define GUL_WGET_H
  3. // C++17 includes the <filesystem> library, but
  4. // unfortunately gcc7 does not have a finalized version of it
  5. // it is in the <experimental/filesystem lib
  6. // this section includes the proper header
  7. // depending on whether the header exists and
  8. // includes that. It also sets the
  9. // nf namespace
  10. #if __has_include(<filesystem>)
  11. #include <filesystem>
  12. namespace gul
  13. {
  14. namespace fs = std::filesystem;
  15. }
  16. #elif __has_include(<experimental/filesystem>)
  17. #include <experimental/filesystem>
  18. namespace gul
  19. {
  20. namespace fs = std::experimental::filesystem;
  21. }
  22. #else
  23. #error There is no <filesystem> or <experimental/filesystem>
  24. #endif
  25. #include <map>
  26. #include "uri.h"
  27. namespace gul
  28. {
  29. template<typename callable_t>
  30. void split_path(std::string const &s, std::string const &delimiter, callable_t &&C )
  31. {
  32. size_t pos_start = 0, pos_end, delim_len = delimiter.length();
  33. while ((pos_end = s.find (delimiter, pos_start)) != std::string::npos)
  34. {
  35. std::string_view S( &s[pos_start], pos_end-pos_start);
  36. pos_start = pos_end + delim_len;
  37. C(S);
  38. }
  39. std::string_view S( &s[pos_start], s.size()-pos_start);
  40. C(S);
  41. }
  42. /**
  43. * @brief The curl_get class
  44. *
  45. * curl_get class is used to download data using curl.
  46. * This performs a system call to /usr/bin/curl or curl.exe (on windows)
  47. * to download the file
  48. */
  49. struct HTTP
  50. {
  51. static inline std::string CACHE_PATH = (fs::temp_directory_path() / fs::path("gul_wget")).string();
  52. static inline std::string CURL_PATH =
  53. #if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__)
  54. "C:\\Windows\\System32\\curl.exe";
  55. #else
  56. "/usr/bin/curl";
  57. #endif
  58. static inline std::string CURL_ADDITIONAL_FLAGS = "";
  59. fs::path get(gul::uri const & Pd)
  60. {
  61. auto P = cache_file(Pd);
  62. fs::create_directories( P.parent_path() );
  63. std::string cmd = CURL_PATH + " "
  64. + CURL_ADDITIONAL_FLAGS +
  65. + " -s -o " + P.string() + " "
  66. + Pd.toString();
  67. std::system(cmd.c_str());
  68. return P;
  69. }
  70. static fs::path cache_file(gul::uri const & _uri)
  71. {
  72. std::hash<std::string> H;
  73. std::string urlPath = _uri.toString();
  74. auto h = H(urlPath);
  75. auto fn = fs::path(_uri.path).filename();
  76. if(fn.empty())
  77. {
  78. fn = _uri.host;
  79. }
  80. return fs::path(CACHE_PATH) / _uri.host / std::to_string(h) / fn;
  81. }
  82. };
  83. /**
  84. * @brief The RC class
  85. *
  86. * Local path resources. set the RESOURCE list
  87. */
  88. struct RC
  89. {
  90. static inline std::string RESOURCE_LIST = "";
  91. #if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__)
  92. static inline std::string PATH_DELIMITER = ";";
  93. #else
  94. static inline std::string PATH_DELIMITER = ":";
  95. #endif
  96. fs::path get(gul::uri const & Pd)
  97. {
  98. fs::path ret;
  99. split_path(RESOURCE_LIST, PATH_DELIMITER, [&Pd, &ret](auto & p)
  100. {
  101. if( ret.empty() && fs::exists( fs::path(p) / Pd.path) )
  102. {
  103. ret = fs::path(p) / Pd.path;
  104. }
  105. });
  106. return ret;
  107. }
  108. };
  109. /**
  110. * @brief The URIGetter class
  111. *
  112. * The URIGetter class is used to access data from a URI.
  113. * For example, a http:// or https:// scheme will download the file form the
  114. * http url and provide a local cached path so that it can be read from.
  115. */
  116. struct URIGetter
  117. {
  118. using function_type = std::function<fs::path(gul::uri const&)>;
  119. std::map<std::string, function_type > uri_getter =
  120. {
  121. {
  122. "file",
  123. function_type([](gul::uri const & _uri)
  124. {
  125. (void)_uri;
  126. return _uri.path;
  127. })
  128. },
  129. {
  130. "http",
  131. function_type([](gul::uri const & _uri)
  132. {
  133. return HTTP().get(_uri);
  134. })
  135. },
  136. {
  137. "https",
  138. function_type([](gul::uri const & _uri)
  139. {
  140. return HTTP().get(_uri);
  141. })
  142. },
  143. {
  144. "rc",
  145. function_type([](gul::uri const & _uri)
  146. {
  147. return RC().get(_uri);
  148. })
  149. }
  150. };
  151. /**
  152. * @brief get
  153. * @param _uri
  154. * @param ignoreCache
  155. * @return
  156. *
  157. * Download a file from _uri. If ignoreCache==true, then the previously
  158. * downloaded file will be overwritten.
  159. *
  160. * Returns the local path to the file after it has downloaded.
  161. *
  162. * if _uri schema is file://, then it will retun the uri's path
  163. *
  164. * if _uri's schela is http or https, then it will use curl to download the file to
  165. * a cached location and return the filepath to the cache.
  166. *
  167. * This is a blocking call.
  168. */
  169. fs::path get(gul::uri const & _uri) const
  170. {
  171. return uri_getter.at(_uri.scheme)(_uri);
  172. }
  173. };
  174. }
  175. #endif