find-maint.texi 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209
  1. \input texinfo @c -*-texinfo-*-
  2. @c %**start of header
  3. @setfilename find-maint.info
  4. @include versionmaint.texi
  5. @settitle Maintaining GNU Findutils @value{VERSION}
  6. @c For double-sided printing, uncomment:
  7. @c @setchapternewpage odd
  8. @c %**end of header
  9. @iftex
  10. @finalout
  11. @end iftex
  12. @dircategory GNU organization
  13. @direntry
  14. * Maintaining Findutils: (find-maint). Maintaining GNU findutils
  15. @end direntry
  16. @copying
  17. This manual explains how GNU findutils is maintained, how changes should
  18. be made and tested, and what resources exist to help developers.
  19. This document corresponds to version @value{VERSION} of the GNU findutils.
  20. Copyright @copyright{} 2007--2021 Free Software Foundation, Inc.
  21. @quotation
  22. Permission is granted to copy, distribute and/or modify this document
  23. under the terms of the GNU Free Documentation License, Version 1.3 or
  24. any later version published by the Free Software Foundation; with no
  25. Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
  26. A copy of the license is included in the section entitled
  27. ``GNU Free Documentation License''.
  28. @end quotation
  29. @end copying
  30. @titlepage
  31. @title Maintaining GNU Findutils
  32. @subtitle version @value{VERSION}, @value{UPDATED}
  33. @author by James Youngman
  34. @page
  35. @vskip 0pt plus 1filll
  36. @insertcopying
  37. @end titlepage
  38. @contents
  39. @ifnottex
  40. @node Top, Introduction, (dir), (dir)
  41. @top Maintaining GNU Findutils
  42. @insertcopying
  43. @end ifnottex
  44. @menu
  45. * Introduction::
  46. * Maintaining GNU Programs::
  47. * Design Issues::
  48. * Coding Conventions::
  49. * Tools::
  50. * Using the GNU Portability Library::
  51. * Documentation::
  52. * Testing::
  53. * Bugs::
  54. * Distributions::
  55. * Internationalisation::
  56. * Security::
  57. * Making Releases::
  58. * GNU Free Documentation License::
  59. @end menu
  60. @node Introduction
  61. @chapter Introduction
  62. This document explains how to contribute to and maintain GNU
  63. Findutils. It concentrates on developer-specific issues. For
  64. information about how to use the software please refer to
  65. @xref{Introduction, ,Introduction,find,The Findutils manual}.
  66. This manual aims to be useful without necessarily being verbose. It's
  67. also a recent document, so there will be a many areas in which
  68. improvements can be made. If you find that the document misses out
  69. important information or any part of the document is be so terse as to
  70. be unuseful, please ask for help on the @email{bug-findutils@@gnu.org}
  71. mailing list. We'll try to improve this document too.
  72. @node Maintaining GNU Programs
  73. @chapter Maintaining GNU Programs
  74. GNU Findutils is part of the GNU Project and so there are a number of
  75. documents which set out standards for the maintenance of GNU
  76. software.
  77. @table @file
  78. @item standards.texi
  79. GNU Project Coding Standards. All changes to findutils should comply
  80. with these standards. In some areas we go somewhat beyond the
  81. requirements of the standards, but these cases are explained in this
  82. manual.
  83. @item maintain.texi
  84. Information for Maintainers of GNU Software. This document provides
  85. guidance for GNU maintainers. Everybody with commit access should
  86. read this document. Everybody else is welcome to do so too, of
  87. course.
  88. @end table
  89. @node Design Issues
  90. @chapter Design Issues
  91. The findutils package is installed on many many systems, usually as a
  92. fundamental component. The programs in the package are often used in
  93. order to successfully boot or fix the system.
  94. This fact means that for findutils we bear in mind considerations that
  95. may not apply so much as for other packages. For example, the fact
  96. that findutils is often a base component motivates us to
  97. @itemize
  98. @item Limit dependencies on libraries
  99. @item Avoid dependencies on other large packages (for example, interpreters)
  100. @item Be conservative when making changes to the 'stable' release branch
  101. @end itemize
  102. All those considerations come before functionality. Functional
  103. enhancements are still made to findutils, but these are almost
  104. exclusively introduced in the 'development' release branch, to allow
  105. extensive testing and proving.
  106. Sometimes it is useful to have a priority list to provide guidance
  107. when making design trade-offs. For findutils, that priority list is:
  108. @enumerate
  109. @item Correctness
  110. @item Standards compliance
  111. @item Security
  112. @item Backward compatibility
  113. @item Performance
  114. @item Functionality
  115. @end enumerate
  116. For example, we support the @code{-exec} action because POSIX
  117. compliance requires this, even though there are security problems with
  118. it and we would otherwise prefer people to use @code{-execdir}. There
  119. are also cases where some performance is sacrificed in the name of
  120. security. For example, the sanity checks that @code{find} performs
  121. while traversing a directory tree may slow it down. We adopt
  122. functional changes, and functional changes are allowed to make
  123. @code{find} slower, but only if there is no detectable impact on users
  124. who don't use the feature.
  125. Backward-incompatible changes do get made in order to comply with
  126. standards (for example the behaviour of @code{-perm -...} changed in
  127. order to comply with POSIX). However, they don't get made in order to
  128. provide better ease of use; for example the semantics of @code{-size
  129. -2G} are almost always unexpected by users, but we retain the current
  130. behaviour because of backward compatibility and for its similarity to
  131. the block-rounding behaviour of @code{-size -30}. We might introduce
  132. a change which does not have the unfortunate rounding behaviour, but
  133. we would choose another syntax (for example @code{-size '<2G'}) for
  134. this.
  135. In a general sense, we try to do test-driven development of the
  136. findutils code; that is, we try to implement test cases for new
  137. features and bug fixes before modifying the code to make the test
  138. pass. Some features of the code are tested well, but the test
  139. coverage for other features is less good. If you are about to modify
  140. the code for a predicate and aren't sure about the test coverage, use
  141. @code{grep} on the test directories and measure the coverage with
  142. @code{lcov} or another test coverage tool.
  143. You should be able to use the @code{coverage} Makefile target (it's
  144. defined in @code{maint.mk} to generate a test coverage report for
  145. findutils. Due to limitations in @code{lcov}, this only works if
  146. your build directory is the same asthe source directory (that is,
  147. you're not using a VPATH build configuration).
  148. Lastly, we try not to depend on having a ``working system''. The
  149. findutils suite is used for diagnosis of problems, and this applies
  150. especially to @code{find}. We should ensure that @code{find} still
  151. works on relatively broken systems, for example systems with damaged
  152. @file{/etc/passwd} or @code{/etc/fstab} files. Another interesting
  153. example is the case where a system is a client of one or more
  154. unresponsive NFS servers. On such a system, if you try to stat all
  155. mount points, your program will hang indefinitely, waiting for the
  156. remote NFS server to respond.
  157. Another interesting but unusual case is broken NFS servers and corrupt
  158. filesystems; sometimes they return `impossible' file modes. It's
  159. important that find does not entirely fail when encountering such a
  160. file.
  161. @node Coding Conventions
  162. @chapter Coding Conventions
  163. Coding style documents which set out to establish a uniform look and
  164. feel to source code have worthy goals, for example greater ease of
  165. maintenance and readability. However, I do not believe that in
  166. general coding style guide authors can envisage every situation, and
  167. it is always possible that it might on occasion be necessary to break
  168. the letter of the style guide in order to honour its spirit, or to
  169. better achieve the style guide's goals.
  170. I've certainly seen many style guides outside the free software world
  171. which make bald statements such as ``functions shall have exactly one
  172. return statement''. The desire to ensure consistency and obviousness
  173. of control flow is laudable, but it is all too common for such bald
  174. requirements to be followed unthinkingly. Certainly I've seen such
  175. coding standards result in unmaintainable code with terrible
  176. infelicities such as functions containing @code{if} statements nested
  177. nine levels deep. I suppose such coding standards don't survive in
  178. free software projects because they tend to drive away potential
  179. contributors or tend to generate heated discussions on mailing lists.
  180. Equally, a nine-level-deep function in a free software program would
  181. quickly get refactored, assuming it is obvious what the function is
  182. supposed to do...
  183. Be that as it may, the approach I will take for this document is to
  184. explain some idioms and practices in use in the findutils source code,
  185. and leave it up to the reader's engineering judgement to decide which
  186. considerations apply to the code they are working on, and whether or
  187. not there is sufficient reason to ignore the guidance in current
  188. circumstances.
  189. @menu
  190. * Make the Compiler Find the Bugs::
  191. * Factor Out Repeated Code::
  192. * Debugging is For Users Too::
  193. * Don't Trust the File System Contents::
  194. * The File System Is Being Modified::
  195. @end menu
  196. @node Make the Compiler Find the Bugs
  197. @section Make the Compiler Find the Bugs
  198. Finding bugs is tedious. If I have a filesystem containing two
  199. million files, and a find command line should print one million of
  200. them, but in fact it misses out 1%, you can tell the program is
  201. printing the wrong result only if you know the right answer for that
  202. filesystem at that time. If you don't know this, you may just not
  203. find out about that bug. For this reason it is important to have a
  204. comprehensive test suite.
  205. The test suite is of course not the only way to find the bugs. The
  206. findutils source code makes liberal use of the assert macro. While on
  207. the one hand these might be a performance drain, the performance
  208. impact of most of these is negligible compared to the time taken to
  209. fetch even one sector from a disk drive.
  210. Assertions should not be used to check the results of operations which
  211. may be affected by the program's external environment. For example,
  212. never assert that a file could be opened successfully. Errors
  213. relating to problems with the program's execution environment should
  214. be diagnosed with a user-oriented error message. An assertion failure
  215. should always denote a bug in the program.
  216. Avoid using @code{assert} to mark not-fully-implemented features of
  217. your code as such. Finish the implementation, disable the code, or
  218. leave the unfinished version on a local branch.
  219. Several programs in the findutils suite perform self-checks. See for
  220. example the function @code{pred_sanity_check} in @file{find/pred.c}.
  221. This is generally desirable.
  222. There are also a number of small ways in which we can help the
  223. compiler to find the bugs for us.
  224. @subsection Constants in Equality Testing
  225. It's a common error to write @code{=} when @code{==} is meant.
  226. Sometimes this happens in new code and is simply due to finger
  227. trouble. Sometimes it is the result of the inadvertent deletion of a
  228. character. In any case, there is a subset of cases where we can
  229. persuade the compiler to generate an error message when we make this
  230. mistake; this is where the equality test is with a constant.
  231. This is an example of a vulnerable piece of code.
  232. @example
  233. if (x == 2)
  234. ...
  235. @end example
  236. A simple typo converts the above into
  237. @example
  238. if (x = 2)
  239. ...
  240. @end example
  241. We've introduced a bug; the condition is always true, and the value of
  242. @code{x} has been changed. However, a simple change to our practice
  243. would have made us immune to this problem:
  244. @example
  245. if (2 == x)
  246. ...
  247. @end example
  248. Usually, the Emacs keystroke @kbd{M-t} can be used to swap the operands.
  249. @subsection Spelling of ASCII NUL
  250. Strings in C are just sequences of characters terminated by a NUL.
  251. The ASCII NUL character has the numerical value zero. It is normally
  252. represented in C code as @samp{\0}. Here is a typical piece of C
  253. code:
  254. @example
  255. *p = '\0';
  256. @end example
  257. Consider what happens if there is an unfortunate typo:
  258. @example
  259. *p = '0';
  260. @end example
  261. We have changed the meaning of our program and the compiler cannot
  262. diagnose this as an error. Our string is no longer terminated. Bad
  263. things will probably happen. It would be better if the compiler could
  264. help us diagnose this problem.
  265. In C, the type of @code{'\0'} is in fact int, not char. This provides
  266. us with a simple way to avoid this error. The constant @code{0} has
  267. the same value and type as the constant @code{'\0'}. However, it is
  268. not as vulnerable to typos. For this reason I normally prefer to
  269. use this code:
  270. @example
  271. *p = 0;
  272. @end example
  273. @node Factor Out Repeated Code
  274. @section Factor Out Repeated Code
  275. Repeated code imposes a greater maintenance burden and increases the
  276. exposure to bugs. For example, if you discover that something you
  277. want to implement has some similarity with an existing piece of code,
  278. don't cut and paste it. Instead, factor the code out. The risk of
  279. cutting and pasting the code, particularly if you do this several
  280. times, is that you end up with several copies of the same code.
  281. If the original code had a bug, you now have N places where this needs
  282. to be fixed. It's all to easy to miss some out when trying to fix the
  283. bug. Equally, it's quite possible that when pasting the code into
  284. some function, the pasted code was not quite adapted correctly to its
  285. new environment. To pick a contrived example, perhaps it modifies a
  286. global variable which it (that [original] code) shouldn't be touching
  287. in its new home. Worse, perhaps it makes some unstated assumption about
  288. the nature of the input arguments which is in fact not true for the
  289. context of the now duplicated code.
  290. A good example of the use of refactoring in findutils is the
  291. @code{collect_arg} function in @file{find/parser.c}. A less clear-cut
  292. but larger example is the factoring out of code which would otherwise
  293. have been duplicated between @file{find/oldfind.c} and
  294. @code{find/ftsfind.c}.
  295. The findutils test suite is comprehensive enough that refactoring code
  296. should not generally be a daunting prospect from a testing point of
  297. view. Nevertheless there are some areas which are only
  298. lightly-tested:
  299. @enumerate
  300. @item Tests on the ages of files
  301. @item Code which deals with the values returned by operating system calls (for example handling of ENOENT)
  302. @item Code dealing with OS limits (for example, limits on path length
  303. or exec arguments)
  304. @item Code relating to features not all systems have (for example
  305. Solaris Doors)
  306. @end enumerate
  307. Please exercise caution when working in those areas.
  308. @node Debugging is For Users Too
  309. @section Debugging is For Users Too
  310. Debug and diagnostic code is often used to verify that a program is
  311. working in the way its author thinks it should be. But users are
  312. often uncertain about what a program is doing, too. Exposing them a
  313. little more diagnostic information can help. Much of the diagnostic
  314. code in @code{find}, for example, is controlled by the @samp{-D} flag,
  315. as opposed to C preprocessor directives.
  316. Making diagnostic messages available to users also means that the
  317. phrasing of the diagnostic messages becomes important, too.
  318. @node Don't Trust the File System Contents
  319. @section Don't Trust the File System Contents
  320. People use @code{find} to search in directories created by other
  321. people. Sometimes they do this to check to suspicious activity (for
  322. example to look for new setuid binaries). This means that it would be
  323. bad if @code{find} were vulnerable to, say, a security problem
  324. exploitable by constructing a specially-crafted filename. The same
  325. consideration would apply to @code{locate} and @code{updatedb}.
  326. Henry Spencer said this well in his fifth commandment:
  327. @quotation
  328. Thou shalt check the array bounds of all strings (indeed, all arrays),
  329. for surely where thou typest @samp{foo} someone someday shall type
  330. @samp{supercalifragilisticexpialidocious}.
  331. @end quotation
  332. Symbolic links can often be a problem. If @code{find} calls
  333. @code{lstat} on something and discovers that it is a directory, it's
  334. normal for @code{find} to recurse into it. Even if the @code{chdir}
  335. system call is used immediately, there is still a window of
  336. opportunity between the @code{lstat} and the @code{chdir} in which a
  337. malicious person could rename the directory and substitute a symbolic
  338. link to some other directory.
  339. @node The File System Is Being Modified
  340. @section The File System Is Being Modified
  341. The filesystem gets modified while you are traversing it. For,
  342. example, it's normal for files to get deleted while @code{find} is
  343. traversing a directory. Issuing an error message seems helpful when a
  344. file is deleted from the one directory you are interested in, but if
  345. @code{find} is searching 15000 directories, such a message becomes
  346. less helpful.
  347. Bear in mind also that it is possible for the directory @code{find} is
  348. searching to be concurrently moved elsewhere in the file system,
  349. and that the directory in which @code{find} was started could be
  350. deleted.
  351. Henry Spencer's sixth commandment is also apposite here:
  352. @quotation
  353. If a function be advertised to return an error code in the event of
  354. difficulties, thou shalt check for that code, yea, even though the
  355. checks triple the size of thy code and produce aches in thy typing
  356. fingers, for if thou thinkest ``it cannot happen to me'', the gods
  357. shall surely punish thee for thy arrogance.
  358. @end quotation
  359. There are a lot of files out there. They come in all dates and
  360. sizes. There is a condition out there in the real world to exercise
  361. every bit of the code base. So we try to test that code base before
  362. someone falls over a bug.
  363. @node Tools
  364. @chapter Tools
  365. Most of the tools required to build findutils are mentioned in the
  366. file @file{README-hacking}. We also use some other tools:
  367. @table @asis
  368. @item System call traces
  369. Much of the execution time of find is spent waiting for filesystem
  370. operations. A system call trace (for example, that provided by
  371. @code{strace}) shows what system calls are being made. Using this
  372. information we can work to remove unnecessary file system operations.
  373. @item Valgrind
  374. Valgrind is a tool which dynamically verifies the memory accesses a
  375. program makes to ensure that they are valid (for example, that the
  376. behaviour of the program does not in any way depend on the contents of
  377. uninitialized memory).
  378. @item DejaGnu
  379. DejaGnu is the test framework used to run the findutils test suite
  380. (the @code{runtest} program is part of DejaGnu). It would be ideal if
  381. everybody building @code{findutils} also ran the test suite, but many
  382. people don't have DejaGnu installed. When changes are made to
  383. findutils, DejaGnu is invoked a lot. @xref{Testing}, for more
  384. information.
  385. @end table
  386. @node Using the GNU Portability Library
  387. @chapter Using the GNU Portability Library
  388. The Gnulib library (@url{https://www.gnu.org/software/gnulib/}) makes a
  389. variety of systems look more like a GNU/Linux system and also applies
  390. a bunch of automatic bug fixes and workarounds. Some of these also
  391. apply to GNU/Linux systems too. For example, the Gnulib regex
  392. implementation is used when we determine that we are building on a
  393. GNU libc system with a bug in the regex implementation.
  394. @section How and Why we Import the Gnulib Code
  395. Gnulib does not have a release process which results in a source
  396. tarball you can download. Instead, the code is simply made available
  397. by GIT, so we import gnulib via the submodule feature. The bootstrap
  398. script performs the necessary steps.
  399. Findutils does not use all the Gnulib code. The modules we need are
  400. listed in the file @file{bootstrap.conf}.
  401. The upshot of all this is that we can use the findutils git repository
  402. to track which version of Gnulib every findutils release uses.
  403. A small number of files are installed by automake and will therefore
  404. vary according to which version of automake was used to generate a
  405. release. This includes for example boiler-plate GNU files such as
  406. @file{ABOUT-NLS}, @file{INSTALL} and @file{COPYING}.
  407. @section How We Fix Gnulib Bugs
  408. Gnulib is used by quite a number of GNU projects, and this means that
  409. it gets plenty of testing. Therefore there are relatively few bugs in
  410. the Gnulib code, but it does happen from time to time.
  411. However, since there is no waiting around for a Gnulib source release
  412. tarball, Gnulib bugs are generally fixed quickly. Here is an outline
  413. of the way we would contribute a fix to Gnulib (assuming you know it
  414. is not already fixed in the current Gnulib git tree):
  415. @table @asis
  416. @item Check you already completed a copyright assignment for Gnulib
  417. @item Begin with a vanilla git tree
  418. Download the Findutils source code from git (or use the tree you have
  419. already)
  420. @item Run the bootstrap script
  421. @item Run configure
  422. @item Build findutils
  423. Build findutils and run the test suite, which should pass. In our
  424. example we assume you have just noticed a bug in Gnulib, not that
  425. recent Gnulib changes broke the findutils regression tests.
  426. @item Write a test case
  427. If in fact Gnulib did break the findutils regression tests, you can probably
  428. skip this step, since you already have a test case demonstrating the problem.
  429. Otherwise, write a findutils test case for the bug and/or a Gnulib test case.
  430. @item Fix the Gnulib bug
  431. Make sure your editor follows symbolic links so that your changes to
  432. @file{gnulib/...} actually affect the files in the git working
  433. directory you checked out earlier. Observe that your test now passes.
  434. @item Prepare a Gnulib patch
  435. In the gnulib subdirectory, use @code{git format-patch} to prepare the
  436. patch. Follow the normal usage for checkin comments (take a look at
  437. the output of @code{git log}). Check that the patch conforms with the
  438. GNU coding standards, and email it to the Gnulib mailing list.
  439. @item Wait for the patch to be applied
  440. Once your bug fix has been applied, you can update your gnulib
  441. directory from git, and then check in the change to the submodule as
  442. normal (you can check @code{git help submodule} for details).
  443. @end table
  444. There is an alternative to the method above; it is possible to store
  445. local diffs to be patched into gnulib beneath the
  446. @file{gnulib-local}. Normally however, there is no need for this,
  447. since gnulib updates are very prompt.
  448. @section How to update Gnulib to latest
  449. With a non-dirty working tree, the command @code{make update-gnulib-to-latest}
  450. (or the shorter alias @code{make gnulib-sync} allows, well, to update the
  451. gnulib submodule. In detail, that is:
  452. @enumerate
  453. @item Fetching the latest upstream gnulib reference.
  454. @item Copying the files which should stay in sync like
  455. @file{bootstrap} from gnulib into the findutils working tree.
  456. @item And finally showing the @code{git status} for the gnulib submodule
  457. and the above copied files.
  458. @end enumerate
  459. After that, the maintainer compares if all is correct, if the findutils build
  460. and run correct, and finally commits with the new gnulib version, e.g. via
  461. @code{git gui}.
  462. The @code{gnulib-sync} target can be run any time - after a @code{configure}
  463. run -, and only rejects to run if the working tree is dirty.
  464. @node Documentation
  465. @chapter Documentation
  466. The findutils git tree includes several different types of
  467. documentation.
  468. @section git change log
  469. The git change log for the source tree contains check-in messages
  470. which describe each check-in. These have a standard format:
  471. @smallexample
  472. Summary of the change.
  473. (ChangeLog-style detail)
  474. @end smallexample
  475. Here, the format of the detail part follows the standard GNU ChangeLog
  476. style, but without whitespace in the left margin and without
  477. author/date headers. Take a look at the output of @code{git log} to
  478. see some examples. The README-hacking file also contains an example
  479. with an explanation.
  480. @section User Documentation
  481. User-oriented documentation is provided as manual pages and in
  482. Texinfo. See
  483. @ref{Introduction,,Introduction,find,The Findutils manual}.
  484. Please make sure both sets of documentation are updated if you make a
  485. change to the code. The GNU coding standards do not normally call for
  486. maintaining manual pages on the grounds of effort duplication.
  487. However, the manual page format is more convenient for quick
  488. reference, and so it's worth maintaining both types of documentation.
  489. However, the manual pages are normally rather more terse than the
  490. Texinfo documentation. The manual pages are suitable for reference
  491. use, but the Texinfo manual should also include introductory and
  492. tutorial material.
  493. We make the user documentation available on the web, on the GNU
  494. project web site. These web pages are source-controlled via CVS
  495. (still!). If you are a member of the @samp{findutils} project on
  496. Savannah you should be able to check the web pages out like this
  497. (@samp{$USER} is a placeholder for your Savannah username):
  498. @smallexample
  499. cvs -d :ext:$USER@@cvs.savannah.gnu.org:/web/findutils checkout findutils/manual
  500. @end smallexample
  501. You can automatically update the documentation in this repository
  502. using the script @samp{build-aux/update-online-manual.sh} in the
  503. findutils Git repository.
  504. @section Build Guidance
  505. @table @file
  506. @item ABOUT-NLS
  507. Describes the Free Translation Project, the translation status of
  508. various GNU projects, and how to participate by translating an
  509. application.
  510. @item AUTHORS
  511. Lists the authors of findutils.
  512. @item COPYING
  513. The copyright license covering findutils; currently, the GNU GPL,
  514. version 3.
  515. @item INSTALL
  516. Generic installation instructions for installing GNU programs.
  517. @item README
  518. Information about how to compile findutils in particular
  519. @item README-hacking
  520. Describes how to build findutils from the code in git.
  521. @item THANKS
  522. Thanks for people who contributed to findutils. Generally, if
  523. someone's contribution was significant enough to need a copyright
  524. assignment, their name should go in here.
  525. @item TODO
  526. Mainly obsolete. Please add bugs to the Savannah bug tracker instead
  527. of adding entries to this file.
  528. @end table
  529. @section Release Information
  530. @table @file
  531. @item NEWS
  532. Enumerates the user-visible change in each release. Typical changes
  533. are fixed bugs, functionality changes and documentation changes.
  534. Include the date when a release is made.
  535. @item ChangeLog
  536. This file enumerates all changes to the findutils source code (with
  537. the possible exception of @file{.cvsignore} and @code{.gitignore}
  538. changes). The level of detail used for this file should be sufficient
  539. to answer the questions ``what changed?'' and ``why was it changed?''.
  540. The file is generated from the git commit messages during @code{make dist}.
  541. If a change fixes a bug, always give the bug reference number in the
  542. @file{NEWS} file and of course also in the checkin message.
  543. In general, it should be possible to enumerate all
  544. material changes to a function by searching for its name in
  545. @file{ChangeLog}. Mention when each release is made.
  546. @end table
  547. @node Testing
  548. @chapter Testing
  549. This chapter will explain the general procedures for adding tests to
  550. the test suite, and the functions defined in the findutils-specific
  551. DejaGnu configuration. Where appropriate references will be made to
  552. the DejaGnu documentation.
  553. @node Bugs
  554. @chapter Bugs
  555. Bugs are logged in the Savannah bug tracker
  556. @url{https://savannah.gnu.org/bugs/?group=findutils}. The tracker
  557. offers several fields but their use is largely obvious. The
  558. life-cycle of a bug is like this:
  559. @table @asis
  560. @item Open
  561. Someone, usually a maintainer, a distribution maintainer or a user,
  562. creates a bug by filling in the form. They fill in field values as
  563. they see fit. This will generate an email to
  564. @email{bug-findutils@@gnu.org}.
  565. @item Triage
  566. The bug hangs around with @samp{Status=None} until someone begins to
  567. work on it. At that point they set the ``Assigned To'' field and will
  568. sometimes set the status to @samp{In Progress}, especially if the bug
  569. will take a while to fix.
  570. @item Non-bugs
  571. Quite a lot of reports are not actually bugs; for these the usual
  572. procedure is to explain why the problem is not a bug, set the status
  573. to @samp{Invalid} and close the bug. Make sure you set the
  574. @samp{Assigned to} field to yourself before closing the bug.
  575. @item Fixing
  576. When you commit a bug fix into git (or in the case of a contributed
  577. patch, commit the change), mark the bug as @samp{Fixed}. Make sure
  578. you include a new test case where this is relevant. If you can figure
  579. out which releases are affected, please also set the @samp{Release}
  580. field to the earliest release which is affected by the bug.
  581. Indicate which source branch the fix is included in (for example,
  582. 4.2.x or 4.3.x). Don't close the bug yet.
  583. @item Release
  584. When a release is made which includes the bug fix, make sure the bug
  585. is listed in the NEWS file. Once the release is made, fill in the
  586. @samp{Fixed Release} field and close the bug.
  587. @end table
  588. @node Distributions
  589. @chapter Distributions
  590. Almost all GNU/Linux distributions include findutils, but only some of
  591. them have a package maintainer who is a member of the mailing list.
  592. Distributions don't often feed back patches to the
  593. @email{bug-findutils@@gnu.org} list, but on the other hand many of
  594. their patches relate only to standards for file locations and so
  595. forth, and are therefore distribution specific. On an irregular basis
  596. I check the current patches being used by one or two distributions,
  597. but the total number of GNU/Linux distributions is large enough that
  598. we could not hope to cover them all.
  599. Often, bugs are raised against a distribution's bug tracker instead of
  600. GNU's. Periodically (about every six months) I take a look at some
  601. of the more accessible bug trackers to indicate which bugs have been
  602. fixed upstream.
  603. Many distributions include both findutils and the slocate package,
  604. which provides a replacement @code{locate}.
  605. @node Internationalisation
  606. @chapter Internationalisation
  607. Translation is essentially automated from the maintainer's point of
  608. view. The TP mails the maintainer when a new PO file is available,
  609. and we just download it and check it in. The @file{bootstrap} script
  610. copies @file{.po} files into the working tree. For more information,
  611. please see
  612. @url{https://translationproject.org/domain/findutils.html}.
  613. @node Security
  614. @chapter Security
  615. See @ref{Security Considerations, ,Security Considerations,find,The
  616. Findutils manual}, for a full description of the findutils approach to
  617. security considerations and discussion of particular tools.
  618. If someone reports a security bug publicly, we should fix this as
  619. rapidly as possible. If necessary, this can mean issuing a fixed
  620. release containing just the one bug fix. We try to avoid issuing
  621. releases which include both significant security fixes and functional
  622. changes.
  623. Where someone reports a security problem privately, we generally try
  624. to construct and test a patch without pushing the intermediate code to
  625. the public repository.
  626. Once everything has been tested, this allows us to make a release and
  627. push the patch. The advantage of doing things this way is that we
  628. avoid situations where people watching for git commits can figure out
  629. and exploit a security problem before a fixed release is available.
  630. It's important that security problems be fixed promptly, but don't
  631. rush so much that things go wrong. Make sure the new release really
  632. fixes the problem. It's usually best not to include functional
  633. changes in your security-fix release.
  634. If the security problem is serious, send an alert to
  635. @email{vendor-sec@@lst.de}. The members of the list include most
  636. GNU/Linux distributions. The point of doing this is to allow them to
  637. prepare to release your security fix to their customers, once the fix
  638. becomes available. Here is an example alert:-
  639. @smallexample
  640. GNU findutils heap buffer overrun (potential privilege escalation)
  641. I. BACKGROUND
  642. =============
  643. GNU findutils is a set of programs which search for files on Unix-like
  644. systems. It is maintained by the GNU Project of the Free Software
  645. Foundation. For more information, see
  646. @url{https://www.gnu.org/software/findutils}.
  647. II. DESCRIPTION
  648. ===============
  649. When GNU locate reads filenames from an old-format locate database,
  650. they are read into a fixed-length buffer allocated on the heap.
  651. Filenames longer than the 1026-byte buffer can cause a buffer overrun.
  652. The overrunning data can be chosen by any person able to control the
  653. names of filenames created on the local system. This will normally
  654. include all local users, but in many cases also remote users (for
  655. example in the case of FTP servers allowing uploads).
  656. III. ANALYSIS
  657. =============
  658. Findutils supports three different formats of locate database, its
  659. native format "LOCATE02", the slocate variant of LOCATE02, and a
  660. traditional ("old") format that locate uses on other Unix systems.
  661. When locate reads filenames from a LOCATE02 database (the default
  662. format), the buffer into which data is read is automatically extended
  663. to accommodate the length of the filenames.
  664. This automatic buffer extension does not happen for old-format
  665. databases. Instead a 1026-byte buffer is used. When a longer
  666. pathname appears in the locate database, the end of this buffer is
  667. overrun. The buffer is allocated on the heap (not the stack).
  668. If the locate database is in the default LOCATE02 format, the locate
  669. program does perform automatic buffer extension, and the program is
  670. not vulnerable to this problem. The software used to build the
  671. old-format locate database is not itself vulnerable to the same
  672. attack.
  673. Most installations of GNU findutils do not use the old database
  674. format, and so will not be vulnerable.
  675. IV. DETECTION
  676. =============
  677. Software
  678. --------
  679. All existing releases of findutils are affected.
  680. Installations
  681. -------------
  682. To discover the longest path name on a given system, you can use the
  683. following command (requires GNU findutils and GNU coreutils):
  684. @verbatim
  685. find / -print0 | tr -c '\0' 'x' | tr '\0' '\n' | wc -L
  686. @end verbatim
  687. V. EXAMPLE
  688. ==========
  689. This section includes a shell script which determines which of a list
  690. of locate binaries is vulnerable to the problem. The shell script has
  691. been tested only on glibc based systems having a mktemp binary.
  692. NOTE: This script deliberately overruns the buffer in order to
  693. determine if a binary is affected. Therefore running it on your
  694. system may have undesirable effects. We recommend that you read the
  695. script before running it.
  696. @verbatim
  697. #! /bin/sh
  698. set +m
  699. if vanilla_db="$(mktemp nicedb.XXXXXX)" ; then
  700. if updatedb --prunepaths="" --old-format --localpaths="/tmp" \
  701. --output="$@{vanilla_db@}" ; then
  702. true
  703. else
  704. rm -f "$@{vanilla_db@}"
  705. vanilla_db=""
  706. echo "Failed to create old-format locate database; skipping the sanity checks" >&2
  707. fi
  708. fi
  709. make_overrun_db() @{
  710. # Start with a valid database
  711. cat "$@{vanilla_db@}"
  712. # Make the final entry really long
  713. dd if=/dev/zero bs=1 count=1500 2>/dev/null | tr '\000' 'x'
  714. @}
  715. ulimit -c 0
  716. usage() @{ echo "usage: $0 binary [binary...]" >&2; exit $1; @}
  717. [ $# -eq 0 ] && usage 1
  718. bad=""
  719. good=""
  720. ugly=""
  721. if dbfile="$(mktemp nasty.XXXXXX)"
  722. then
  723. make_overrun_db > "$dbfile"
  724. for locate ; do
  725. ver="$locate = $("$locate" --version | head -1)"
  726. if [ -z "$vanilla_db" ] || "$locate" -d "$vanilla_db" "" >/dev/null ; then
  727. "$locate" -d "$dbfile" "" >/dev/null
  728. if [ $? -gt 128 ] ; then
  729. bad="$bad
  730. vulnerable: $ver"
  731. else
  732. good="$good
  733. good: $ver"
  734. fi
  735. else
  736. # the regular locate failed
  737. ugly="$ugly
  738. buggy, may or may not be vulnerable: $ver"
  739. fi
  740. done
  741. rm -f "$@{dbfile@}" "$@{vanilla_db@}"
  742. # good: unaffected. bad: affected (vulnerable).
  743. # ugly: doesn't even work for a normal old-format database.
  744. echo "$good"
  745. echo "$bad"
  746. echo "$ugly"
  747. else
  748. exit 1
  749. fi
  750. @end verbatim
  751. VI. VENDOR RESPONSE
  752. ===================
  753. The GNU project discovered the problem while 'locate' was being worked
  754. on; this is the first public announcement of the problem.
  755. The GNU findutils mantainer has issued a patch as p[art of this
  756. announcement. The patch appears below.
  757. A source release of findutils-4.2.31 will be issued on 2007-05-30.
  758. That release will of course include the patch. The patch will be
  759. committed to the public CVS repository at the same time. Public
  760. announcements of the release, including a description of the bug, will
  761. be made at the same time as the release.
  762. A release of findutils-4.3.x will follow and will also include the
  763. patch.
  764. VII. PATCH
  765. ==========
  766. This patch should apply to findutils-4.2.23 and later.
  767. Findutils-4.2.23 was released almost two years ago.
  768. @verbatim
  769. Index: locate/locate.c
  770. ===================================================================
  771. RCS file: /cvsroot/findutils/findutils/locate/locate.c,v
  772. retrieving revision 1.58.2.2
  773. diff -u -p -r1.58.2.2 locate.c
  774. --- locate/locate.c 22 Apr 2007 16:57:42 -0000 1.58.2.2
  775. +++ locate/locate.c 28 May 2007 10:18:16 -0000
  776. @@@@ -124,9 +124,9 @@@@ extern int errno;
  777. #include "locatedb.h"
  778. #include <getline.h>
  779. -#include "../gnulib/lib/xalloc.h"
  780. -#include "../gnulib/lib/error.h"
  781. -#include "../gnulib/lib/human.h"
  782. +#include "xalloc.h"
  783. +#include "error.h"
  784. +#include "human.h"
  785. #include "dirname.h"
  786. #include "closeout.h"
  787. #include "nextelem.h"
  788. @@@@ -468,10 +468,36 @@@@ visit_justprint_unquoted(struct process_
  789. return VISIT_CONTINUE;
  790. @}
  791. +static void
  792. +toolong (struct process_data *procdata)
  793. +@{
  794. + error (EXIT_FAILURE, 0,
  795. + _("locate database %s contains a "
  796. + "filename longer than locate can handle"),
  797. + procdata->dbfile);
  798. +@}
  799. +
  800. +static void
  801. +extend (struct process_data *procdata, size_t siz1, size_t siz2)
  802. +@{
  803. + /* Figure out if the addition operation is safe before performing it. */
  804. + if (SIZE_MAX - siz1 < siz2)
  805. + @{
  806. + toolong (procdata);
  807. + @}
  808. + else if (procdata->pathsize < (siz1+siz2))
  809. + @{
  810. + procdata->pathsize = siz1+siz2;
  811. + procdata->original_filename = x2nrealloc (procdata->original_filename,
  812. + &procdata->pathsize,
  813. + 1);
  814. + @}
  815. +@}
  816. +
  817. static int
  818. visit_old_format(struct process_data *procdata, void *context)
  819. @{
  820. - register char *s;
  821. + register size_t i;
  822. (void) context;
  823. /* Get the offset in the path where this path info starts. */
  824. @@@@ -479,20 +505,35 @@@@ visit_old_format(struct process_data *pr
  825. procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
  826. else
  827. procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
  828. + assert(procdata->count > 0);
  829. - /* Overlay the old path with the remainder of the new. */
  830. - for (s = procdata->original_filename + procdata->count;
  831. + /* Overlay the old path with the remainder of the new. Read
  832. + * more data until we get to the next filename.
  833. + */
  834. + for (i=procdata->count;
  835. (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
  836. - if (procdata->c < 0200)
  837. - *s++ = procdata->c; /* An ordinary character. */
  838. - else
  839. - @{
  840. - /* Bigram markers have the high bit set. */
  841. - procdata->c &= 0177;
  842. - *s++ = procdata->bigram1[procdata->c];
  843. - *s++ = procdata->bigram2[procdata->c];
  844. - @}
  845. - *s-- = '\0';
  846. + @{
  847. + if (procdata->c < 0200)
  848. + @{
  849. + /* An ordinary character. */
  850. + extend (procdata, i, 1u);
  851. + procdata->original_filename[i++] = procdata->c;
  852. + @}
  853. + else
  854. + @{
  855. + /* Bigram markers have the high bit set. */
  856. + extend (procdata, i, 2u);
  857. + procdata->c &= 0177;
  858. + procdata->original_filename[i++] = procdata->bigram1[procdata->c];
  859. + procdata->original_filename[i++] = procdata->bigram2[procdata->c];
  860. + @}
  861. + @}
  862. +
  863. + /* Consider the case where we executed the loop body zero times; we
  864. + * still need space for the terminating null byte.
  865. + */
  866. + extend (procdata, i, 1u);
  867. + procdata->original_filename[i] = 0;
  868. procdata->munged_filename = procdata->original_filename;
  869. @end verbatim
  870. VIII. THANKS
  871. ============
  872. Thanks to Rob Holland <rob@@inversepath.com> and Tavis Ormandy.
  873. VIII. CVE INFORMATION
  874. =====================
  875. No CVE candidate number has yet been assigned for this vulnerability.
  876. If someone provides one, I will include it in the public announcement
  877. and change logs.
  878. @end smallexample
  879. The original announcement above was sent out with a cleartext PGP
  880. signature, of course, but that has been omitted from the example.
  881. Once a fixed release is available, announce the new release using the
  882. normal channels. Any CVE number assigned for the problem should be
  883. included in the @file{ChangeLog} and @file{NEWS} entries. See
  884. @url{https://cve.mitre.org/} for an explanation of CVE numbers.
  885. @node Making Releases
  886. @chapter Making Releases
  887. This section will explain how to make a findutils release. For the
  888. time being here is a terse description of the main steps:
  889. @set RELEASE X.Y.Z
  890. @set RELTAG v@value{RELEASE}
  891. @enumerate
  892. @item Commit changes; make sure your working directory has no
  893. uncommitted changes.
  894. @item Update translation files; re-run bootstrap to download the
  895. newest @samp{.po} files.
  896. @item Make sure compiler warnings would block the release; re-run
  897. @samp{configure} with the options
  898. @code{--enable-compiler-warnings --enable-compiler-warnings-are-errors}.
  899. @item Test; make sure that all changes you have made have tests, and
  900. that the tests pass.
  901. Verify this with @code{env RUN_EXPENSIVE_TESTS=yes make distcheck}.
  902. @c The RUN_EXPENSIVE_TESTS environment variable is checked in init.cfg.
  903. @item Bugs; make sure all Savannah bug entries fixed in this release
  904. are marked as fixed in Savannah. Optionally close them too to save
  905. duplicate work (otherwise, close them after the release is uploaded).
  906. @item Add new release in Savannah field values; see the @code{Bugs >
  907. Edit Field Values} menu item. Add a field value for the release you
  908. are about to make so that users can report bugs in it.
  909. @item Update version; make sure that the NEWS file
  910. is updated with the new release number (and checked in).
  911. @c There is no longer any need to update configure.ac, since it no
  912. @c longer contains version information.
  913. @item Tag the release; findutils releases are tagged like this for
  914. example: v4.5.5. You can create a tag with the a command like this:
  915. @c we use @example here because @value will not work within @code or @samp.
  916. @example
  917. git tag -s -m "Findutils release @value{RELEASE}" @value{RELTAG}
  918. @end example
  919. @noindent
  920. @item Build the release tarball; do this with @code{make distcheck}.
  921. Copy the tarball somewhere safe.
  922. @item Merge; if the release (and signed tag) were made on a
  923. local branch, merge the branch to your local master.
  924. @item Push; push your master to origin/master.
  925. @item Push the new release tag; assuming that the name of your remote is
  926. @samp{origin}, this is:
  927. @example
  928. git push origin tag @value{RELTAG}
  929. @end example
  930. @item Prepare the upload and upload it.
  931. You can do this with
  932. @c we use @example here because @value will not work within @code or @samp.
  933. @example
  934. build-aux/gnupload --to ftp.gnu.org:findutils findutils-@value{RELEASE}.tar.xz
  935. @end example
  936. @noindent
  937. Use @code{alpha.gnu.org:findutils} for an alpha or beta release.
  938. @xref{Automated FTP Uploads, ,Automated FTP
  939. Uploads, maintain, Information for Maintainers of GNU Software},
  940. for detailed upload instructions.
  941. @item Check the FTP upload worked; you can look for an email from the
  942. robot or check the contents of the actual FTP site.
  943. @item Make a release announcement; include an extract from the NEWS
  944. file which explains what's changed. Announcements for test releases
  945. should just go to @email{bug-findutils@@gnu.org}. Announcements for
  946. stable releases should go to @email{info-gnu@@gnu.org} as well.
  947. @item Post-release administrativa: add a new dummy release header in NEWS:
  948. @code{* Major changes in release ?.?.?, YYYY-MM-DD}
  949. and update the @code{old_NEWS_hash} in @file{cfg.mk} with
  950. @code{make update-NEWS-hash}.
  951. Commit both changes.
  952. @c make update-NEWS-hash supports make news-check but we normally
  953. @c don't do that (and I'm not sure that the current NEWS file would
  954. @c pass the check anyway).
  955. @item Close bugs; any bugs recorded on Savannah which were fixed in this
  956. release should now be marked as closed if there were not already.
  957. Update the @samp{Fixed Release} field of these bugs appropriately and
  958. make sure the @samp{Assigned to} field is populated.
  959. @end enumerate
  960. @node GNU Free Documentation License
  961. @appendix GNU Free Documentation License
  962. @include fdl.texi
  963. @bye
  964. @comment texi related words used by Emacs' spell checker ispell.el
  965. @comment LocalWords: texinfo setfilename settitle setchapternewpage
  966. @comment LocalWords: iftex finalout ifinfo DIR titlepage vskip pt
  967. @comment LocalWords: filll dir samp dfn noindent xref pxref
  968. @comment LocalWords: var deffn texi deffnx itemx emph asis
  969. @comment LocalWords: findex smallexample subsubsection cindex
  970. @comment LocalWords: dircategory direntry itemize
  971. @comment other words used by Emacs' spell checker ispell.el
  972. @comment LocalWords: README fred updatedb xargs Plett Rendell akefile
  973. @comment LocalWords: args grep Filesystems fo foo fOo wildcards iname
  974. @comment LocalWords: ipath regex iregex expr fubar regexps
  975. @comment LocalWords: metacharacters macs sr sc inode lname ilname
  976. @comment LocalWords: sysdep noleaf ls inum xdev filesystems usr atime
  977. @comment LocalWords: ctime mtime amin cmin mmin al daystart Sladkey rm
  978. @comment LocalWords: anewer cnewer bckw rf xtype uname gname uid gid
  979. @comment LocalWords: nouser nogroup chown chgrp perm ch maxdepth
  980. @comment LocalWords: mindepth cpio src CD AFS statted stat fstype ufs
  981. @comment LocalWords: nfs tmp mfs printf fprint dils rw djm Nov lwall
  982. @comment LocalWords: POSIXLY fls fprintf strftime locale's EDT GMT AP
  983. @comment LocalWords: EST diff perl backquotes sprintf Falstad Oct cron
  984. @comment LocalWords: eg vmunix mkdir afs allexec allwrite ARG bigram
  985. @comment LocalWords: bigrams cd chmod comp crc CVS dbfile eof
  986. @comment LocalWords: fileserver filesystem fn frcode Ghazi Hnewc iXX
  987. @comment LocalWords: joeuser Kaveh localpaths localuser LOGNAME
  988. @comment LocalWords: Meyering mv netpaths netuser nonblank nonblanks
  989. @comment LocalWords: ois ok Pinard printindex proc procs prunefs
  990. @comment LocalWords: prunepaths pwd RFS rmadillo rmdir rsh sbins str
  991. @comment LocalWords: su Timar ubins ug unstripped vf VM Weitzel
  992. @comment LocalWords: wildcard zlogout basename execdir wholename iwholename
  993. @comment LocalWords: timestamp timestamps Solaris FreeBSD OpenBSD POSIX