* builtin conversion between tabs and spaces [not found] ` <d4bc1a2a0810141842q1e50c85au7d813f2e5e37a84c@mail.gmail.com> @ 2008-10-15 1:44 ` Stefan Karpinski 2008-10-15 1:47 ` Stefan Karpinski ` (2 more replies) 0 siblings, 3 replies; 11+ messages in thread From: Stefan Karpinski @ 2008-10-15 1:44 UTC (permalink / raw) To: Git Mailing List I find myself really wanting to enforce standards in the use of spaces versus tabs. I deal with some unruly programmers who refuse to set their editors to use soft tabs, but I *hate* tabs in the repo. And of course others feel equally strongly about keeping only tabs in the repo (e.g. the git repo). This led me to wonder if it wouldn't make sense to have this conversion ability built into git. The following patch implements this functionality. It still needs work—it's not meant to be final, just to give an idea—but I just wanted to see if people on the git list thought this sort of thing would be worthwhile at all. If people think it's worth having in git, then how should it be configured? I feel like a project should be able to define the expected tab size for binary file types. Moreover, the project should be able to define the default cannonicalization with resepect to whitespace for different files types. Then, if they so desire, each git user should be able to override the output format on a per-repository basis. Does this make any sense? Comments? --- diff --git a/convert.c b/convert.c index 1816e97..280f45b 100644 --- a/convert.c +++ b/convert.c @@ -18,7 +18,7 @@ struct text_stat { /* NUL, CR, LF and CRLF counts */ - unsigned nul, cr, lf, crlf; + unsigned nul, cr, lf, crlf, tab; /* These are just approximations! */ unsigned printable, nonprintable; @@ -48,7 +48,10 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat * else if (c < 32) { switch (c) { /* BS, HT, ESC and FF */ - case '\b': case '\t': case '\033': case '\014': + case '\t': + stats->tab++; + /* fall through */ + case '\b': case '\033': case '\014': stats->printable++; break; case 0: @@ -235,6 +238,105 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len, return 1; } +static int tabs_to_spaces(const char *path, const char *src, size_t len, + struct strbuf *buf, int untabify) +{ + char *to_free = NULL; + struct text_stat stats; + static const unsigned tab_size = 4; + char *spaces; + + if (!untabify) + return 0; + + /* instead of calling twice, should cache these stats across calls */ + gather_stats(src, len, &stats); + + if (!stats.tab) + return 0; + + /* are we "faking" in place editing ? */ + if (src == buf->buf) + to_free = strbuf_detach(buf, NULL); + + /* this growth may be excessive: not all tabs get tab_size spaces */ + strbuf_grow(buf, len + tab_size * stats.tab); + spaces = (char *) xmalloc(tab_size); + memset(spaces, ' ', tab_size); + for (;;) { + const char *line = src; + const char *nl = memchr(src, '\n', len); + char *tab; + if (!nl) + nl = src + len; + while (src < nl && (tab = memchr(src, '\t', nl - src))) { + strbuf_add(buf, src, tab - src); + strbuf_add(buf, spaces, tab_size - ((tab - line) % tab_size)); + src = tab + 1; + } + if (src < nl) + strbuf_add(buf, src, nl - src); + if (nl < src + len) + strbuf_addch(buf, '\n'); + else + break; + src = nl + 1; + len -= src - line; + } + + free(to_free); + free(spaces); + return 1; +} + +static int spaces_to_tabs(const char *path, const char *src, size_t len, + struct strbuf *buf, int tabify) +{ + static const unsigned tab_size = 4; + + if (!tabify) + return 0; + + /* only grow if not in place */ + if (strbuf_avail(buf) + buf->len < len) + strbuf_grow(buf, len - buf->len); + + for (;;) { + int tabs = 0, spaces = 0; + const char *line = src; + const char *nl = memchr(src, '\n', len); + if (!nl) + nl = src + len; + for (;; src++) { + if (*src == ' ') { + spaces++; + if (spaces == tab_size) { + tabs++; + spaces = 0; + } + } else if (*src == '\t') { + tabs++; + spaces = 0; + } else break; + } + if (line < src) { + memset(buf->buf + buf->len, '\t', tabs); + memset(buf->buf + buf->len + tabs, ' ', spaces); + strbuf_setlen(buf, buf->len + tabs + spaces); + } + if (src < nl) + strbuf_add(buf, src, nl - src); + if (nl < src + len) + strbuf_addch(buf, '\n'); + else + break; + src = nl + 1; + len -= src - line; + } + + return 1; +} + struct filter_params { const char *src; unsigned long size; @@ -370,22 +472,29 @@ static int read_convert_config(const char *var, const char *value, void *cb) return 0; } -static void setup_convert_check(struct git_attr_check *check) +struct convert_checks { + struct git_attr_check crlf, tabs, ident, filter; +}; + +static void setup_convert_check(struct convert_checks *checks) { static struct git_attr *attr_crlf; + static struct git_attr *attr_tabs; static struct git_attr *attr_ident; static struct git_attr *attr_filter; if (!attr_crlf) { attr_crlf = git_attr("crlf", 4); + attr_tabs = git_attr("tabs", 4); attr_ident = git_attr("ident", 5); attr_filter = git_attr("filter", 6); user_convert_tail = &user_convert; git_config(read_convert_config, NULL); } - check[0].attr = attr_crlf; - check[1].attr = attr_ident; - check[2].attr = attr_filter; + checks->crlf.attr = attr_crlf; + checks->tabs.attr = attr_tabs; + checks->ident.attr = attr_ident; + checks->filter.attr = attr_filter; } static int count_ident(const char *cp, unsigned long size) @@ -566,20 +675,22 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check) return !!ATTR_TRUE(value); } +#define CHECK_ARRAY_SIZE (sizeof(struct convert_checks)/sizeof(struct git_attr_check)) + int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe) { - struct git_attr_check check[3]; + struct convert_checks checks; int crlf = CRLF_GUESS; int ident = 0, ret = 0; const char *filter = NULL; - setup_convert_check(check); - if (!git_checkattr(path, ARRAY_SIZE(check), check)) { + setup_convert_check(&checks); + if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *) &checks)) { struct convert_driver *drv; - crlf = git_path_check_crlf(path, check + 0); - ident = git_path_check_ident(path, check + 1); - drv = git_path_check_convert(path, check + 2); + crlf = git_path_check_crlf(path, &(checks.crlf)); + ident = git_path_check_ident(path, &(checks.ident)); + drv = git_path_check_convert(path, &(checks.filter)); if (drv && drv->clean) filter = drv->clean; } @@ -589,6 +700,11 @@ int convert_to_git(const char *path, const char *src, size_t len, src = dst->buf; len = dst->len; } + ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable + if (ret) { + src = dst->buf; + len = dst->len; + } ret |= crlf_to_git(path, src, len, dst, crlf, checksafe); if (ret) { src = dst->buf; @@ -599,17 +715,17 @@ int convert_to_git(const char *path, const char *src, size_t len, int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst) { - struct git_attr_check check[3]; + struct convert_checks checks; int crlf = CRLF_GUESS; int ident = 0, ret = 0; const char *filter = NULL; - setup_convert_check(check); - if (!git_checkattr(path, ARRAY_SIZE(check), check)) { + setup_convert_check(&checks); + if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *) &checks)) { struct convert_driver *drv; - crlf = git_path_check_crlf(path, check + 0); - ident = git_path_check_ident(path, check + 1); - drv = git_path_check_convert(path, check + 2); + crlf = git_path_check_crlf(path, &(checks.crlf)); + ident = git_path_check_ident(path, &(checks.ident)); + drv = git_path_check_convert(path, &(checks.filter)); if (drv && drv->smudge) filter = drv->smudge; } @@ -624,5 +740,10 @@ int convert_to_working_tree(const char *path, const char *src, size_t len, struc src = dst->buf; len = dst->len; } + ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable + if (ret) { + src = dst->buf; + len = dst->len; + } return ret | apply_filter(path, src, len, dst, filter); } ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: builtin conversion between tabs and spaces 2008-10-15 1:44 ` builtin conversion between tabs and spaces Stefan Karpinski @ 2008-10-15 1:47 ` Stefan Karpinski 2008-10-15 6:25 ` Alex Riesen 2008-10-15 6:26 ` Johannes Sixt 2 siblings, 0 replies; 11+ messages in thread From: Stefan Karpinski @ 2008-10-15 1:47 UTC (permalink / raw) To: Git Mailing List Appologies for gmail mangling. I will use git send-email for real patches. On Tue, Oct 14, 2008 at 6:44 PM, Stefan Karpinski <stefan.karpinski@gmail•com> wrote: > I find myself really wanting to enforce standards in the use of spaces > versus tabs. I deal with some unruly programmers who refuse to set > their editors to use soft tabs, but I *hate* tabs in the repo. And of > course others feel equally strongly about keeping only tabs in the > repo (e.g. the git repo). > > This led me to wonder if it wouldn't make sense to have this > conversion ability built into git. The following patch implements this > functionality. It still needs work—it's not meant to be final, just to > give an idea—but I just wanted to see if people on the git list > thought this sort of thing would be worthwhile at all. > > If people think it's worth having in git, then how should it be > configured? I feel like a project should be able to define the > expected tab size for binary file types. Moreover, the project should > be able to define the default cannonicalization with resepect to > whitespace for different files types. Then, if they so desire, each > git user should be able to override the output format on a > per-repository basis. > > Does this make any sense? Comments? > > --- > diff --git a/convert.c b/convert.c > index 1816e97..280f45b 100644 > --- a/convert.c > +++ b/convert.c > @@ -18,7 +18,7 @@ > > struct text_stat { > /* NUL, CR, LF and CRLF counts */ > - unsigned nul, cr, lf, crlf; > + unsigned nul, cr, lf, crlf, tab; > > /* These are just approximations! */ > unsigned printable, nonprintable; > @@ -48,7 +48,10 @@ > static void gather_stats(const char *buf, unsigned long size, struct > text_stat * > else if (c < 32) { > switch (c) { > /* BS, HT, ESC and FF */ > - case '\b': case '\t': case '\033': case '\014': > + case '\t': > + stats->tab++; > + /* fall through */ > + case '\b': case '\033': case '\014': > stats->printable++; > break; > case 0: > @@ -235,6 +238,105 @@ > static int crlf_to_worktree(const char *path, const char *src, size_t len, > return 1; > } > > +static int tabs_to_spaces(const char *path, const char *src, size_t len, > + > struct strbuf *buf, int untabify) > +{ > + char *to_free = NULL; > + struct text_stat stats; > + static const unsigned tab_size = 4; > + char *spaces; > + > + if (!untabify) > + return 0; > + > + /* instead of calling twice, should cache these stats across calls */ > + gather_stats(src, len, &stats); > + > + if (!stats.tab) > + return 0; > + > + /* are we "faking" in place editing ? */ > + if (src == buf->buf) > + to_free = strbuf_detach(buf, NULL); > + > + /* this growth may be excessive: not all tabs get tab_size spaces */ > + strbuf_grow(buf, len + tab_size * stats.tab); > + spaces = (char *) xmalloc(tab_size); > + memset(spaces, ' ', tab_size); > + for (;;) { > + const char *line = src; > + const char *nl = memchr(src, '\n', len); > + char *tab; > + if (!nl) > + nl = src + len; > + while (src < nl && (tab = memchr(src, '\t', nl - src))) { > + strbuf_add(buf, src, tab - src); > + strbuf_add(buf, spaces, tab_size - ((tab - line) % tab_size)); > + src = tab + 1; > + } > + if (src < nl) > + strbuf_add(buf, src, nl - src); > + if (nl < src + len) > + strbuf_addch(buf, '\n'); > + else > + break; > + src = nl + 1; > + len -= src - line; > + } > + > + free(to_free); > + free(spaces); > + return 1; > +} > + > +static int spaces_to_tabs(const char *path, const char *src, size_t len, > + > struct strbuf *buf, int tabify) > +{ > + static const unsigned tab_size = 4; > + > + if (!tabify) > + return 0; > + > + /* only grow if not in place */ > + if (strbuf_avail(buf) + buf->len < len) > + strbuf_grow(buf, len - buf->len); > + > + for (;;) { > + int tabs = 0, spaces = 0; > + const char *line = src; > + const char *nl = memchr(src, '\n', len); > + if (!nl) > + nl = src + len; > + for (;; src++) { > + if (*src == ' ') { > + spaces++; > + if (spaces == tab_size) { > + tabs++; > + spaces = 0; > + } > + } else if (*src == '\t') { > + tabs++; > + spaces = 0; > + } else break; > + } > + if (line < src) { > + memset(buf->buf + buf->len, '\t', tabs); > + memset(buf->buf + buf->len + tabs, ' ', spaces); > + strbuf_setlen(buf, buf->len + tabs + spaces); > + } > + if (src < nl) > + strbuf_add(buf, src, nl - src); > + if (nl < src + len) > + strbuf_addch(buf, '\n'); > + else > + break; > + src = nl + 1; > + len -= src - line; > + } > + > + return 1; > +} > + > struct filter_params { > const char *src; > unsigned long size; > @@ -370,22 +472,29 @@ > static int read_convert_config(const char *var, const char *value, void *cb) > return 0; > } > > -static void setup_convert_check(struct git_attr_check *check) > +struct convert_checks { > + struct git_attr_check crlf, tabs, ident, filter; > +}; > + > +static void setup_convert_check(struct convert_checks *checks) > { > static struct git_attr *attr_crlf; > + static struct git_attr *attr_tabs; > static struct git_attr *attr_ident; > static struct git_attr *attr_filter; > > if (!attr_crlf) { > attr_crlf = git_attr("crlf", 4); > + attr_tabs = git_attr("tabs", 4); > attr_ident = git_attr("ident", 5); > attr_filter = git_attr("filter", 6); > user_convert_tail = &user_convert; > git_config(read_convert_config, NULL); > } > - check[0].attr = attr_crlf; > - check[1].attr = attr_ident; > - check[2].attr = attr_filter; > + checks->crlf.attr = attr_crlf; > + checks->tabs.attr = attr_tabs; > + checks->ident.attr = attr_ident; > + checks->filter.attr = attr_filter; > } > > static int count_ident(const char *cp, unsigned long size) > @@ -566,20 +675,22 @@ > static int git_path_check_ident(const char *path, struct git_attr_check *check) > return !!ATTR_TRUE(value); > } > > +#define CHECK_ARRAY_SIZE (sizeof(struct convert_checks)/sizeof(struct > git_attr_check)) > + > int convert_to_git(const char *path, const char *src, size_t len, > struct strbuf *dst, enum safe_crlf checksafe) > { > - struct git_attr_check check[3]; > + struct convert_checks checks; > int crlf = CRLF_GUESS; > int ident = 0, ret = 0; > const char *filter = NULL; > > - setup_convert_check(check); > - if (!git_checkattr(path, ARRAY_SIZE(check), check)) { > + setup_convert_check(&checks); > + if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *) > &checks)) { > struct convert_driver *drv; > - crlf = git_path_check_crlf(path, check + 0); > - ident = git_path_check_ident(path, check + 1); > - drv = git_path_check_convert(path, check + 2); > + crlf = git_path_check_crlf(path, &(checks.crlf)); > + ident = git_path_check_ident(path, &(checks.ident)); > + drv = git_path_check_convert(path, &(checks.filter)); > if (drv && drv->clean) > filter = drv->clean; > } > @@ -589,6 +700,11 @@ > int convert_to_git(const char *path, const char *src, size_t len, > src = dst->buf; > len = dst->len; > } > + ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable > + if (ret) { > + src = dst->buf; > + len = dst->len; > + } > ret |= crlf_to_git(path, src, len, dst, crlf, checksafe); > if (ret) { > src = dst->buf; > @@ -599,17 +715,17 @@ > int convert_to_git(const char *path, const char *src, size_t len, > > int convert_to_working_tree(const char *path, const char *src, size_t > len, struct strbuf *dst) > { > - struct git_attr_check check[3]; > + struct convert_checks checks; > int crlf = CRLF_GUESS; > int ident = 0, ret = 0; > const char *filter = NULL; > > - setup_convert_check(check); > - if (!git_checkattr(path, ARRAY_SIZE(check), check)) { > + setup_convert_check(&checks); > + if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *) > &checks)) { > struct convert_driver *drv; > - crlf = git_path_check_crlf(path, check + 0); > - ident = git_path_check_ident(path, check + 1); > - drv = git_path_check_convert(path, check + 2); > + crlf = git_path_check_crlf(path, &(checks.crlf)); > + ident = git_path_check_ident(path, &(checks.ident)); > + drv = git_path_check_convert(path, &(checks.filter)); > if (drv && drv->smudge) > filter = drv->smudge; > } > @@ -624,5 +740,10 @@ > int convert_to_working_tree(const char *path, const char *src, size_t > len, struc > src = dst->buf; > len = dst->len; > } > + ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable > + if (ret) { > + src = dst->buf; > + len = dst->len; > + } > return ret | apply_filter(path, src, len, dst, filter); > } > ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: builtin conversion between tabs and spaces 2008-10-15 1:44 ` builtin conversion between tabs and spaces Stefan Karpinski 2008-10-15 1:47 ` Stefan Karpinski @ 2008-10-15 6:25 ` Alex Riesen 2008-10-15 20:52 ` Stefan Karpinski 2008-10-15 6:26 ` Johannes Sixt 2 siblings, 1 reply; 11+ messages in thread From: Alex Riesen @ 2008-10-15 6:25 UTC (permalink / raw) To: Stefan Karpinski; +Cc: Git Mailing List Stefan Karpinski, Wed, Oct 15, 2008 03:44:10 +0200: > I find myself really wanting to enforce standards in the use of spaces > versus tabs. I deal with some unruly programmers who refuse to set > their editors to use soft tabs, but I *hate* tabs in the repo. And of > course others feel equally strongly about keeping only tabs in the > repo (e.g. the git repo). > > This led me to wonder if it wouldn't make sense to have this > conversion ability built into git. The following patch implements this > functionality. It still needs work—it's not meant to be final, just to > give an idea—but I just wanted to see if people on the git list > thought this sort of thing would be worthwhile at all. Is your conversion two-way? IOW, is it possible to convert the converted file and get the original? (Because all the existing conversions are reversible) ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: builtin conversion between tabs and spaces 2008-10-15 6:25 ` Alex Riesen @ 2008-10-15 20:52 ` Stefan Karpinski 2008-10-15 21:02 ` Jonathan del Strother 0 siblings, 1 reply; 11+ messages in thread From: Stefan Karpinski @ 2008-10-15 20:52 UTC (permalink / raw) To: Alex Riesen; +Cc: Git Mailing List > Is your conversion two-way? IOW, is it possible to convert the > converted file and get the original? (Because all the existing > conversions are reversible) Yes and no. The CRLF conversion isn't always invertable—it is so long as your use of CRLF/LF is consistent. The tab/space conversion is similar: if you consistently use spaces, then tabs_to_spaces will always give you back your original version; if you consistently use tabs, then spaces_to_tabs will give you back your original version. If you use some crazy mix of the two, you cannot reconstruct your original without remembering where there were tabs versus spaces, which information either filter destroys. But that's the same as the CRLF conversion. You could enable a warning when worktree file has an inconsistent mixture of tabs and spaces, like there is for inconsistent CRLF files. On Tue, Oct 14, 2008 at 11:25 PM, Alex Riesen <raa.lkml@gmail•com> wrote: > Stefan Karpinski, Wed, Oct 15, 2008 03:44:10 +0200: >> I find myself really wanting to enforce standards in the use of spaces >> versus tabs. I deal with some unruly programmers who refuse to set >> their editors to use soft tabs, but I *hate* tabs in the repo. And of >> course others feel equally strongly about keeping only tabs in the >> repo (e.g. the git repo). >> >> This led me to wonder if it wouldn't make sense to have this >> conversion ability built into git. The following patch implements this >> functionality. It still needs work—it's not meant to be final, just to >> give an idea—but I just wanted to see if people on the git list >> thought this sort of thing would be worthwhile at all. > > Is your conversion two-way? IOW, is it possible to convert the > converted file and get the original? (Because all the existing > conversions are reversible) > ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: builtin conversion between tabs and spaces 2008-10-15 20:52 ` Stefan Karpinski @ 2008-10-15 21:02 ` Jonathan del Strother 2008-10-15 21:18 ` Stefan Karpinski 0 siblings, 1 reply; 11+ messages in thread From: Jonathan del Strother @ 2008-10-15 21:02 UTC (permalink / raw) To: Stefan Karpinski; +Cc: Alex Riesen, Git Mailing List On Wed, Oct 15, 2008 at 9:52 PM, Stefan Karpinski <stefan.karpinski@gmail•com> wrote: > if you consistently use spaces, then tabs_to_spaces will > always give you back your original version; if you consistently use > tabs, then spaces_to_tabs will give you back your original version. If > you use some crazy mix of the two, you cannot reconstruct your > original without remembering where there were tabs versus spaces, Just IMO, a crazy mix of tabs and spaces is the only _sane_ thing to do. Using tabs for the initial indentation, plus spaces for alignment of function arguments / comments / whatever, is the only way of getting a layout that will both look right regardless of the tab size, and allow a viewer to alter the indentation size. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: builtin conversion between tabs and spaces 2008-10-15 21:02 ` Jonathan del Strother @ 2008-10-15 21:18 ` Stefan Karpinski 2008-10-15 23:02 ` Stefan Karpinski 0 siblings, 1 reply; 11+ messages in thread From: Stefan Karpinski @ 2008-10-15 21:18 UTC (permalink / raw) To: Jonathan del Strother; +Cc: Alex Riesen, Git Mailing List, Johannes Sixt >> if you consistently use spaces, then tabs_to_spaces will >> always give you back your original version; if you consistently use >> tabs, then spaces_to_tabs will give you back your original version. If >> you use some crazy mix of the two, you cannot reconstruct your >> original without remembering where there were tabs versus spaces, > > Just IMO, a crazy mix of tabs and spaces is the only _sane_ thing to > do. Using tabs for the initial indentation, plus spaces for alignment > of function arguments / comments / whatever, is the only way of > getting a layout that will both look right regardless of the tab size, > and allow a viewer to alter the indentation size. That's not what I would call a "crazy" mix of tabs and spaces, but rather a *sane* mix of tabs and spaces. That can consistently be reproduced, and is in fact what the spaces_to_tabs function included above produces. The sane consistent formats as I see it are: 1) use spaces for everything 2) use tabs for indentation, spaces for everything else 3) use tabs for indentation and alignment If you know the tab size, you can reproduce any of these from the others, except that #3 is a little tricky since there's places where the tab/space issue can be ambiguous. I actually think that keeping the repo version with tab-based indentation is a very sane thing to do. However, I'd still like to be able to edit the files using soft tabs, largely because any program that doesn't know what my tab size should be applies its own interpretation and makes the code look terrible (think terminal output for diff, cat, less, etc.) On the other hand, a *crazy* mix of tabs and spaces is where some indentation is done with spaces while other indentation is done with tabs. Even crazier is a single line where the indentation is a mixture of tabs and spaces. I think that just about everyone can agree that this is not only crazy, but evil and is the kind of thing one really wants to avoid in a code base. Unfortunately, when developers disagree on their standard settings, it's very, very hard to avoid precisely this kind of mess. My idea is to enable git to prevent this sort of insanity if configured to do so. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: builtin conversion between tabs and spaces 2008-10-15 21:18 ` Stefan Karpinski @ 2008-10-15 23:02 ` Stefan Karpinski 2008-10-15 23:34 ` A Large Angry SCM 0 siblings, 1 reply; 11+ messages in thread From: Stefan Karpinski @ 2008-10-15 23:02 UTC (permalink / raw) To: Jonathan del Strother; +Cc: Alex Riesen, Git Mailing List, Johannes Sixt Any further comments? I'm more than willing to implement this, but I won't bother if there's no chance of getting it accepted as a patch. Does no one else feel like at least having the option to enforce whitespace consistency in git is a good thing? If not, I guess I'll just muddle along without this feature instead of implementing it. On Wed, Oct 15, 2008 at 2:18 PM, Stefan Karpinski <stefan.karpinski@gmail•com> wrote: > That's not what I would call a "crazy" mix of tabs and spaces, but > rather a *sane* mix of tabs and spaces. That can consistently be > reproduced, and is in fact what the spaces_to_tabs function included > above produces. The sane consistent formats as I see it are: > > 1) use spaces for everything > 2) use tabs for indentation, spaces for everything else > 3) use tabs for indentation and alignment > > If you know the tab size, you can reproduce any of these from the > others, except that #3 is a little tricky since there's places where > the tab/space issue can be ambiguous. I actually think that keeping > the repo version with tab-based indentation is a very sane thing to > do. However, I'd still like to be able to edit the files using soft > tabs, largely because any program that doesn't know what my tab size > should be applies its own interpretation and makes the code look > terrible (think terminal output for diff, cat, less, etc.) > > On the other hand, a *crazy* mix of tabs and spaces is where some > indentation is done with spaces while other indentation is done with > tabs. Even crazier is a single line where the indentation is a mixture > of tabs and spaces. I think that just about everyone can agree that > this is not only crazy, but evil and is the kind of thing one really > wants to avoid in a code base. Unfortunately, when developers disagree > on their standard settings, it's very, very hard to avoid precisely > this kind of mess. My idea is to enable git to prevent this sort of > insanity if configured to do so. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: builtin conversion between tabs and spaces 2008-10-15 23:02 ` Stefan Karpinski @ 2008-10-15 23:34 ` A Large Angry SCM 2008-10-16 2:00 ` Stefan Karpinski 0 siblings, 1 reply; 11+ messages in thread From: A Large Angry SCM @ 2008-10-15 23:34 UTC (permalink / raw) To: Stefan Karpinski Cc: Jonathan del Strother, Alex Riesen, Git Mailing List, Johannes Sixt Stefan Karpinski wrote: > Any further comments? I'm more than willing to implement this, but I > won't bother if there's no chance of getting it accepted as a patch. > Does no one else feel like at least having the option to enforce > whitespace consistency in git is a good thing? If not, I guess I'll > just muddle along without this feature instead of implementing it. I'm against including this in except as a sample smudge/clean script. Git is a content tracker; not the enforcement mechanism for individual project policies. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: builtin conversion between tabs and spaces 2008-10-15 23:34 ` A Large Angry SCM @ 2008-10-16 2:00 ` Stefan Karpinski 0 siblings, 0 replies; 11+ messages in thread From: Stefan Karpinski @ 2008-10-16 2:00 UTC (permalink / raw) To: gitzilla Cc: Jonathan del Strother, Alex Riesen, Git Mailing List, Johannes Sixt >> Any further comments? I'm more than willing to implement this, but I >> won't bother if there's no chance of getting it accepted as a patch. >> Does no one else feel like at least having the option to enforce >> whitespace consistency in git is a good thing? If not, I guess I'll >> just muddle along without this feature instead of implementing it. > > I'm against including this in except as a sample smudge/clean script. Git is > a content tracker; not the enforcement mechanism for individual project > policies. Alright. Fair enough. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: builtin conversion between tabs and spaces 2008-10-15 1:44 ` builtin conversion between tabs and spaces Stefan Karpinski 2008-10-15 1:47 ` Stefan Karpinski 2008-10-15 6:25 ` Alex Riesen @ 2008-10-15 6:26 ` Johannes Sixt 2008-10-15 20:55 ` Stefan Karpinski 2 siblings, 1 reply; 11+ messages in thread From: Johannes Sixt @ 2008-10-15 6:26 UTC (permalink / raw) To: Stefan Karpinski; +Cc: Git Mailing List Stefan Karpinski schrieb: > This led me to wonder if it wouldn't make sense to have this > conversion ability built into git. This wouldn't help your case a lot. It is still at the discretion of each individual repository owner to enable the conversion. (You didn't mean to make this conversion mandatory, did you?) BTW, you don't need to change git code to achieve this. It's sufficient to install a suitable "clean" filter: echo "*.c filter=c-code" > .git/info/attributes git config filter.c-code.clean tabs2spaces where tabs2spaces is your utility that does the conversion. -- Hannes ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: builtin conversion between tabs and spaces 2008-10-15 6:26 ` Johannes Sixt @ 2008-10-15 20:55 ` Stefan Karpinski 0 siblings, 0 replies; 11+ messages in thread From: Stefan Karpinski @ 2008-10-15 20:55 UTC (permalink / raw) To: Johannes Sixt; +Cc: Git Mailing List >> This led me to wonder if it wouldn't make sense to have this >> conversion ability built into git. > > This wouldn't help your case a lot. It is still at the discretion of each > individual repository owner to enable the conversion. (You didn't mean to > make this conversion mandatory, did you?) This would all be completely discretionary, of course. I just thought that the issue was complicated enough and the problem of mixed spaces and tabs in source code ubiquitous enough that it might be worth "solving" correctly in git itself. > BTW, you don't need to change git code to achieve this. It's sufficient to > install a suitable "clean" filter: > > echo "*.c filter=c-code" > .git/info/attributes > git config filter.c-code.clean tabs2spaces > > where tabs2spaces is your utility that does the conversion. That was the first thing I did—in ruby first, then in C. The ruby version is *way* too slow to use on any number of files—and the typical use case is that most of your files are source code and so will have this applied to them. On the other hand, the ruby script (or perl or whatever) is portable, so I can send it to my Windows-based developers and have them use it too. But the speed issue is pretty crucial, so I rewrote the thing as an external C program, which is obviously much faster. But it's still noticably slower than native git checkout though. Worse still, now the filter script isn't portable, so I have to cross-compile it and give every developer the version that works on their platform and make sure that they have their path setup correctly, etc. Having the conversion built into git would 1) have native git checkout speed (I've tried it and it's not noticable slower than normal git checkouts); 2) be naturally portable to anywhere git works. Now, if I thought this was a thing that would only benefit me, I would never bring it up. However, it seems like something that a lot of projects might want to use to ensure standard spacing in their source files. Moreover, since developers feel strongly about using tabs or spaces, it would be beneficial for individuals too. I think there's enough complex issues that solving it once-and-for-all might be worth a shot. ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2008-10-16 2:02 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <d4bc1a2a0810141839r547a770j3a8e56312afa6a53@mail.gmail.com>
[not found] ` <d4bc1a2a0810141842q1e50c85au7d813f2e5e37a84c@mail.gmail.com>
2008-10-15 1:44 ` builtin conversion between tabs and spaces Stefan Karpinski
2008-10-15 1:47 ` Stefan Karpinski
2008-10-15 6:25 ` Alex Riesen
2008-10-15 20:52 ` Stefan Karpinski
2008-10-15 21:02 ` Jonathan del Strother
2008-10-15 21:18 ` Stefan Karpinski
2008-10-15 23:02 ` Stefan Karpinski
2008-10-15 23:34 ` A Large Angry SCM
2008-10-16 2:00 ` Stefan Karpinski
2008-10-15 6:26 ` Johannes Sixt
2008-10-15 20:55 ` Stefan Karpinski
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox