c++ - Pass-by-reference hinders gcc from tail call elimination -


see blendingtable::create , blendingtable::print. both have same form of tail recursion, while create optimized loop, print not , cause stack overflow.

go down see fix, got hint 1 of gcc devs on bug report of problem.

#include <cstdlib> #include <iostream> #include <memory> #include <array> #include <limits>  class system { public:     template<typename t, typename... ts>     static void print(const t& t, const ts&... ts) {         std::cout << t << std::flush;         print(ts...);     }      static void print() {}      template<typename... ts>     static void printline(const ts&... ts) {         print(ts..., '\n');     } };  template<typename t, int dimension = 1> class array { private:     std::unique_ptr<t[]> pointer;     std::array<int, dimension> sizes;     int realsize;  public:     array() {}      template<typename... ns>     array(ns... ns):     realsize(1) {         checkarguments(ns...);         create(1, ns...);     }  private:     template<typename... ns>     static void checkarguments(ns...) {         static_assert(sizeof...(ns) == dimension, "dimension mismatch");     }      template<typename... ns>     void create(int d, int n, ns... ns) {         realsize *= n;         sizes[d - 1] = n;         create(d + 1, ns...);     }      void create(int) {         pointer = std::unique_ptr<t[]>(new t[realsize]);     }      int computesubsize(int d) const {         if (d == dimension) {             return 1;         }         return sizes[d] * computesubsize(d + 1);     }      template<typename... ns>     int getindex(int d, int n, ns... ns) const {         return n * computesubsize(d) + getindex(d + 1, ns...);     }      int getindex(int) const {         return 0;     }  public:     template<typename... ns>     t& operator()(ns... ns) const {         checkarguments(ns...);         return pointer[getindex(1, ns...)];     }      int getsize(int d = 1) const {         return sizes[d - 1];     } };  class blendingtable : public array<unsigned char, 3> { private:     enum {         size = 0x100,         ff = size - 1,     };  public:     blendingtable():     array<unsigned char, 3>(size, size, size) {         static_assert(std::numeric_limits<unsigned char>::max() == ff, "unsupported byte format");         create(ff, ff, ff);     }  private:     void create(int dst, int src, int a) {         (*this)(dst, src, a) = (src * + dst * (ff - a)) / ff;         if (a > 0) {             create(dst, src, - 1);         } else if (src > 0) {             create(dst, src - 1, ff);         } else if (dst > 0) {             create(dst - 1, ff, ff);         } else {             return;         }     }      void print(int dst, int src, int a) const {         system::print(static_cast<int>((*this)(ff - dst, ff - src, ff - a)), ' ');         if (a > 0) {             print(dst, src, - 1);         } else if (src > 0) {             print(dst, src - 1, ff);         } else if (dst > 0) {             print(dst - 1, ff, ff);         } else {             system::printline();             return;         }     }  public:     void print() const {         print(ff, ff, ff);     } };  int main() {     blendingtable().print();     return exit_success; } 

changing class definition of system from

class system { public:     template<typename t, typename... ts>     static void print(const t& t, const ts&... ts) {         std::cout << t << std::flush;         print(ts...);     }      static void print() {}      template<typename... ts>     static void printline(const ts&... ts) {         print(ts..., '\n');     } }; 

to

class system { public:     template<typename t, typename... ts>     static void print(t t, ts... ts) {         std::cout << t << std::flush;         print(ts...);     }      static void print() {}      template<typename... ts>     static void printline(ts... ts) {         print(ts..., '\n');     } }; 

magically allows gcc eliminate tail calls.

why 'whether or not passing function arguments reference' make such big difference in gcc's behaviour? semantically both same me in case.

as noted @jxh cast static_cast<int>() creates temporary reference passed print function. without such cast tail recursion optimized correctly.

the issue similar old case why isn't g++ tail call optimizing while gcc is? , workaround may similar https://stackoverflow.com/a/31793391/4023446.

it still possible use system arguments passed reference if call system::print moved separate private helper function systemprint:

class blendingtable : public array<unsigned char, 3> {  //...  private:     void systemprint(int dst, int src, int a) const     {         system::print(static_cast<int>((*this)(ff - dst, ff - src, ff - a)), ' ');     }      void print(int dst, int src, int a) const {         systemprint(dst, src, a);         if (a > 0) {             print(dst, src, - 1);         } else if (src > 0) {             print(dst, src - 1, ff);         } else if (dst > 0) {             print(dst - 1, ff, ff);         } else {             system::printline();             return;         }     }  // ...  } 

now tail call optimization works (g++ (ubuntu/linaro 4.7.2-2ubuntu1) 4.7.2 optimization option -o2) , print not cause stack overflow.

update

i verified other compilers:

  • the original code without change optimized clang++ apple llvm version 5.1 (clang-503.0.40) (based on llvm 3.4svn) -o1 optimization
  • g++ (ubuntu 4.8.4-2ubuntu1~14.04) 4.8.4 fails perform tco without cast or wrapping function systemprint workaround; here workaround system::print arguments values works.

so, issue specific compiler versions.


Comments

Popular posts from this blog

toolbar - How to add link to user registration inside toobar in admin joomla 3 custom component -

linux - disk space limitation when creating war file -