tar.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042
  1. /* tar.c - create/extract archives
  2. *
  3. * Copyright 2014 Ashwini Kumar <ak.ashwini81@gmail.com>
  4. *
  5. * For the command, see
  6. * http://pubs.opengroup.org/onlinepubs/007908799/xcu/tar.html
  7. * For the modern file format, see
  8. * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
  9. * https://en.wikipedia.org/wiki/Tar_(computing)#File_format
  10. * https://www.gnu.org/software/tar/manual/html_node/Tar-Internals.html
  11. *
  12. * For writing to external program
  13. * http://www.gnu.org/software/tar/manual/html_node/Writing-to-an-External-Program.html
  14. *
  15. * Toybox will never implement the "pax" command as a matter of policy.
  16. *
  17. * Why --exclude pattern but no --include? tar cvzf a.tgz dir --include '*.txt'
  18. *
  19. USE_TAR(NEWTOY(tar, "&(strip-components)#(selinux)(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(mode):(mtime):(group):(owner):(to-command):o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)I(use-compress-program):J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN))
  20. config TAR
  21. bool "tar"
  22. default y
  23. help
  24. usage: tar [-cxt] [-fvohmjkOS] [-XTCf NAME] [--selinux] [FILE...]
  25. Create, extract, or list files in a .tar (or compressed t?z) file.
  26. Options:
  27. c Create x Extract t Test (list)
  28. f tar FILE (default -) C Change to DIR first v Verbose display
  29. o Ignore owner h Follow symlinks m Ignore mtime
  30. J xz compression j bzip2 compression z gzip compression
  31. O Extract to stdout X exclude names in FILE T include names in FILE
  32. --exclude FILENAME to exclude --full-time Show seconds with -tv
  33. --mode MODE Adjust permissions --owner NAME[:UID] Set file ownership
  34. --mtime TIME Override timestamps --group NAME[:GID] Set file group
  35. --sparse Record sparse files --selinux Save/restore labels
  36. --restrict All under one dir --no-recursion Skip dir contents
  37. --numeric-owner Use numeric uid/gid, not user/group names
  38. --strip-components NUM Ignore first NUM directory components when extracting
  39. -I PROG Filter through PROG to compress or PROG -d to decompress
  40. */
  41. #define FOR_tar
  42. #include "toys.h"
  43. GLOBALS(
  44. char *f, *C;
  45. struct arg_list *T, *X;
  46. char *I, *to_command, *owner, *group, *mtime, *mode;
  47. struct arg_list *exclude;
  48. long strip_components;
  49. struct double_list *incl, *excl, *seen;
  50. struct string_list *dirs;
  51. char *cwd;
  52. int fd, ouid, ggid, hlc, warn, adev, aino, sparselen, pid;
  53. long long *sparse;
  54. time_t mtt;
  55. // hardlinks seen so far (hlc many)
  56. struct {
  57. char *arg;
  58. ino_t ino;
  59. dev_t dev;
  60. } *hlx;
  61. // Parsed information about a tar header.
  62. struct tar_header {
  63. char *name, *link_target, *uname, *gname;
  64. long long size, ssize;
  65. uid_t uid;
  66. gid_t gid;
  67. mode_t mode;
  68. time_t mtime;
  69. dev_t device;
  70. } hdr;
  71. )
  72. struct tar_hdr {
  73. char name[100], mode[8], uid[8], gid[8],size[12], mtime[12], chksum[8],
  74. type, link[100], magic[8], uname[32], gname[32], major[8], minor[8],
  75. prefix[155], padd[12];
  76. };
  77. // Tar uses ASCII octal when it fits, base-256 otherwise.
  78. static int ascii_fits(unsigned long long val, int len)
  79. {
  80. return !(val>>(3*(len-1)));
  81. }
  82. // convert from int to octal (or base-256)
  83. static void itoo(char *str, int len, unsigned long long val)
  84. {
  85. if (ascii_fits(val, len)) sprintf(str, "%0*llo", len-1, val);
  86. else {
  87. for (str += len; len--; val >>= 8) *--str = val;
  88. *str = 128;
  89. }
  90. }
  91. #define ITOO(x, y) itoo(x, sizeof(x), y)
  92. // convert octal (or base-256) to int
  93. static unsigned long long otoi(char *str, unsigned len)
  94. {
  95. unsigned long long val = 0;
  96. // When tar value too big or octal, use binary encoding with high bit set
  97. if (128&*str) while (--len) {
  98. if (val<<8 < val) error_exit("bad header");
  99. val = (val<<8)+*++str;
  100. } else {
  101. while (len && *str == ' ') str++;
  102. while (len && *str>='0' && *str<='7') val = val*8+*str++-'0', len--;
  103. if (len && *str && *str != ' ') error_exit("bad header");
  104. }
  105. return val;
  106. }
  107. #define OTOI(x) otoi(x, sizeof(x))
  108. static void write_prefix_block(char *data, int len, char type)
  109. {
  110. struct tar_hdr tmp;
  111. memset(&tmp, 0, sizeof(tmp));
  112. sprintf(tmp.name, "././@%s", type=='x' ? "PaxHeaders" : "LongLink");
  113. ITOO(tmp.uid, 0);
  114. ITOO(tmp.gid, 0);
  115. ITOO(tmp.size, len);
  116. ITOO(tmp.mtime, 0);
  117. tmp.type = type;
  118. strcpy(tmp.magic, "ustar ");
  119. // Historical nonsense to match other implementations. Never used.
  120. ITOO(tmp.mode, 0644);
  121. strcpy(tmp.uname, "root");
  122. strcpy(tmp.gname, "root");
  123. // Calculate checksum. Since 512*255 = 0377000 in octal, this can never
  124. // use more than 6 digits. The last byte is ' ' for historical reasons.
  125. itoo(tmp.chksum, sizeof(tmp.chksum)-1, tar_cksum(&tmp));
  126. tmp.chksum[7] = ' ';
  127. // write header and name, padded with NUL to block size
  128. xwrite(TT.fd, &tmp, 512);
  129. xwrite(TT.fd, data, len);
  130. if (len%512) xwrite(TT.fd, toybuf, 512-(len%512));
  131. }
  132. static void maybe_prefix_block(char *data, int check, int type)
  133. {
  134. int len = strlen(data);
  135. if (len>check) write_prefix_block(data, len+1, type);
  136. }
  137. static struct double_list *filter(struct double_list *lst, char *name)
  138. {
  139. struct double_list *end = lst;
  140. if (lst)
  141. // constant is FNM_LEADING_DIR
  142. do if (!fnmatch(lst->data, name, 1<<3)) return lst;
  143. while (end != (lst = lst->next));
  144. return 0;
  145. }
  146. static void skippy(long long len)
  147. {
  148. if (lskip(TT.fd, len)) perror_exit("EOF");
  149. }
  150. // allocate and read data from TT.fd
  151. static void alloread(void *buf, int len)
  152. {
  153. // actually void **, but automatic typecasting doesn't work with void ** :(
  154. char **b = buf;
  155. free(*b);
  156. *b = xmalloc(len+1);
  157. xreadall(TT.fd, *b, len);
  158. (*b)[len] = 0;
  159. }
  160. // callback from dirtree to create archive
  161. static int add_to_tar(struct dirtree *node)
  162. {
  163. struct stat *st = &(node->st);
  164. struct tar_hdr hdr;
  165. struct passwd *pw = pw;
  166. struct group *gr = gr;
  167. int i, fd = -1, norecurse = FLAG(no_recursion);
  168. char *name, *lnk, *hname;
  169. if (!dirtree_notdotdot(node)) return 0;
  170. if (TT.adev == st->st_dev && TT.aino == st->st_ino) {
  171. error_msg("'%s' file is the archive; not dumped", node->name);
  172. return 0;
  173. }
  174. i = 1;
  175. name = hname = dirtree_path(node, &i);
  176. // exclusion defaults to --no-anchored and --wildcards-match-slash
  177. for (lnk = name; *lnk;) {
  178. if (filter(TT.excl, lnk)) {
  179. norecurse++;
  180. goto done;
  181. }
  182. while (*lnk && *lnk!='/') lnk++;
  183. while (*lnk=='/') lnk++;
  184. }
  185. // Consume the 1 extra byte alocated in dirtree_path()
  186. if (S_ISDIR(st->st_mode) && name[i-1] != '/') strcat(name, "/");
  187. // remove leading / and any .. entries from saved name
  188. if (!FLAG(P)) while (*hname == '/') hname++;
  189. for (lnk = hname;;) {
  190. if (!(lnk = strstr(lnk, ".."))) break;
  191. if (lnk == hname || lnk[-1] == '/') {
  192. if (!lnk[2]) goto done;
  193. if (lnk[2]=='/') {
  194. lnk = hname = lnk+3;
  195. continue;
  196. }
  197. }
  198. lnk += 2;
  199. }
  200. if (!*hname) goto done;
  201. if (TT.warn && hname != name) {
  202. dprintf(2, "removing leading '%.*s' from member names\n",
  203. (int)(hname-name), name);
  204. TT.warn = 0;
  205. }
  206. if (TT.owner) st->st_uid = TT.ouid;
  207. if (TT.group) st->st_gid = TT.ggid;
  208. if (TT.mode) st->st_mode = string_to_mode(TT.mode, st->st_mode);
  209. if (TT.mtime) st->st_mtime = TT.mtt;
  210. memset(&hdr, 0, sizeof(hdr));
  211. strncpy(hdr.name, hname, sizeof(hdr.name));
  212. ITOO(hdr.mode, st->st_mode &07777);
  213. ITOO(hdr.uid, st->st_uid);
  214. ITOO(hdr.gid, st->st_gid);
  215. ITOO(hdr.size, 0); //set size later
  216. ITOO(hdr.mtime, st->st_mtime);
  217. strcpy(hdr.magic, "ustar ");
  218. // Hard link or symlink? i=0 neither, i=1 hardlink, i=2 symlink
  219. // Are there hardlinks to a non-directory entry?
  220. if (st->st_nlink>1 && !S_ISDIR(st->st_mode)) {
  221. // Have we seen this dev&ino before?
  222. for (i = 0; i<TT.hlc; i++) {
  223. if (st->st_ino == TT.hlx[i].ino && st->st_dev == TT.hlx[i].dev)
  224. break;
  225. }
  226. if (i != TT.hlc) {
  227. lnk = TT.hlx[i].arg;
  228. i = 1;
  229. } else {
  230. // first time we've seen it. Store as normal file, but remember it.
  231. if (!(TT.hlc&255))
  232. TT.hlx = xrealloc(TT.hlx, sizeof(*TT.hlx)*(TT.hlc+256));
  233. TT.hlx[TT.hlc].arg = xstrdup(hname);
  234. TT.hlx[TT.hlc].ino = st->st_ino;
  235. TT.hlx[TT.hlc].dev = st->st_dev;
  236. TT.hlc++;
  237. i = 0;
  238. }
  239. } else i = 0;
  240. // Handle file types
  241. if (i || S_ISLNK(st->st_mode)) {
  242. hdr.type = '1'+!i;
  243. if (!i && !(lnk = xreadlink(name))) {
  244. perror_msg("readlink");
  245. goto done;
  246. }
  247. maybe_prefix_block(lnk, sizeof(hdr.link), 'K');
  248. strncpy(hdr.link, lnk, sizeof(hdr.link));
  249. if (!i) free(lnk);
  250. } else if (S_ISREG(st->st_mode)) {
  251. hdr.type = '0';
  252. ITOO(hdr.size, st->st_size);
  253. } else if (S_ISDIR(st->st_mode)) hdr.type = '5';
  254. else if (S_ISFIFO(st->st_mode)) hdr.type = '6';
  255. else if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode)) {
  256. hdr.type = (S_ISCHR(st->st_mode))?'3':'4';
  257. ITOO(hdr.major, dev_major(st->st_rdev));
  258. ITOO(hdr.minor, dev_minor(st->st_rdev));
  259. } else {
  260. error_msg("unknown file type '%o'", st->st_mode & S_IFMT);
  261. goto done;
  262. }
  263. // write out 'x' prefix header for --selinux data
  264. if (FLAG(selinux)) {
  265. int start = 0, sz = 0, temp, len = 0;
  266. char *buf = 0, *sec = "security.selinux";
  267. for (;;) {
  268. // First time get length, second time read data into prepared buffer
  269. len = (S_ISLNK(st->st_mode) ? xattr_lget : xattr_get)
  270. (name, sec, buf+start, sz);
  271. // Handle data or error
  272. if (len>999999 || (sz && len>sz)) len = -1, errno = E2BIG;
  273. if (buf || len<1) {
  274. if (len>0) {
  275. strcpy(buf+start+sz, "\n");
  276. write_prefix_block(buf, start+sz+2, 'x');
  277. } else if (errno==ENODATA || errno==ENOTSUP) len = 0;
  278. if (len) perror_msg("getfilecon %s", name);
  279. free(buf);
  280. break;
  281. }
  282. // Allocate buffer. Length includes prefix: calculate twice (wrap 99->100)
  283. temp = snprintf(0, 0, "%d", sz = (start = 22)+len+1);
  284. start += temp + (temp != snprintf(0, 0, "%d", temp+sz));
  285. buf = xmprintf("%u RHT.%s=%.*s", start+len+1, sec, sz = len, "");
  286. }
  287. }
  288. maybe_prefix_block(hname, sizeof(hdr.name), 'L');
  289. if (!FLAG(numeric_owner)) {
  290. if ((TT.owner || (pw = bufgetpwuid(st->st_uid))) &&
  291. ascii_fits(st->st_uid, sizeof(hdr.uid)))
  292. strncpy(hdr.uname, TT.owner ? : pw->pw_name, sizeof(hdr.uname));
  293. if ((TT.group || (gr = bufgetgrgid(st->st_gid))) &&
  294. ascii_fits(st->st_gid, sizeof(hdr.gid)))
  295. strncpy(hdr.gname, TT.group ? : gr->gr_name, sizeof(hdr.gname));
  296. }
  297. TT.sparselen = 0;
  298. if (hdr.type == '0') {
  299. // Before we write the header, make sure we can read the file
  300. if ((fd = open(name, O_RDONLY)) < 0) {
  301. perror_msg("can't open '%s'", name);
  302. return 0;
  303. }
  304. if (FLAG(S)) {
  305. long long lo, ld = 0, len = 0;
  306. // Enumerate the extents
  307. while ((lo = lseek(fd, ld, SEEK_HOLE)) != -1) {
  308. if (!(TT.sparselen&511))
  309. TT.sparse = xrealloc(TT.sparse, (TT.sparselen+514)*sizeof(long long));
  310. if (ld != lo) {
  311. TT.sparse[TT.sparselen++] = ld;
  312. len += TT.sparse[TT.sparselen++] = lo-ld;
  313. }
  314. if (lo == st->st_size || (ld = lseek(fd, lo, SEEK_DATA)) < lo) break;
  315. }
  316. // If there were extents, change type to S record
  317. if (TT.sparselen>2) {
  318. TT.sparse[TT.sparselen++] = st->st_size;
  319. TT.sparse[TT.sparselen++] = 0;
  320. hdr.type = 'S';
  321. lnk = (char *)&hdr;
  322. for (i = 0; i<TT.sparselen && i<8; i++)
  323. itoo(lnk+386+12*i, 12, TT.sparse[i]);
  324. // Record if there's overflow records, change length to sparse length,
  325. // record apparent length
  326. if (TT.sparselen>8) lnk[482] = 1;
  327. itoo(lnk+483, 12, st->st_size);
  328. ITOO(hdr.size, len);
  329. } else TT.sparselen = 0;
  330. lseek(fd, 0, SEEK_SET);
  331. }
  332. }
  333. itoo(hdr.chksum, sizeof(hdr.chksum)-1, tar_cksum(&hdr));
  334. hdr.chksum[7] = ' ';
  335. if (FLAG(v)) dprintf(1+(TT.fd==1), "%s\n", hname);
  336. // Write header and data to archive
  337. xwrite(TT.fd, &hdr, 512);
  338. if (TT.sparselen>8) {
  339. char buf[512];
  340. // write extent overflow blocks
  341. for (i=8;;i++) {
  342. int j = (i-8)%42;
  343. if (!j || i==TT.sparselen) {
  344. if (i!=8) {
  345. if (i!=TT.sparselen) buf[504] = 1;
  346. xwrite(TT.fd, buf, 512);
  347. }
  348. if (i==TT.sparselen) break;
  349. memset(buf, 0, sizeof(buf));
  350. }
  351. itoo(buf+12*j, 12, TT.sparse[i]);
  352. }
  353. }
  354. TT.sparselen >>= 1;
  355. if (hdr.type == '0' || hdr.type == 'S') {
  356. if (hdr.type == '0') xsendfile_pad(fd, TT.fd, st->st_size);
  357. else for (i = 0; i<TT.sparselen; i++) {
  358. if (TT.sparse[i*2] != lseek(fd, TT.sparse[i*2], SEEK_SET))
  359. perror_msg("%s: seek %lld", name, TT.sparse[i*2]);
  360. xsendfile_pad(fd, TT.fd, TT.sparse[i*2+1]);
  361. }
  362. if (st->st_size%512) writeall(TT.fd, toybuf, (512-(st->st_size%512)));
  363. close(fd);
  364. }
  365. done:
  366. free(name);
  367. return (DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0))*!norecurse;
  368. }
  369. static void wsettime(char *s, long long sec)
  370. {
  371. struct timespec times[2] = {{sec, 0},{sec, 0}};
  372. if (utimensat(AT_FDCWD, s, times, AT_SYMLINK_NOFOLLOW))
  373. perror_msg("settime %lld %s", sec, s);
  374. }
  375. // Do pending directory utimes(), NULL to flush all.
  376. static int dirflush(char *name, int isdir)
  377. {
  378. char *s = 0, *ss;
  379. // Barf if name not in TT.cwd
  380. if (name) {
  381. if (!(ss = s = xabspath(name, isdir ? ABS_LAST : 0))) {
  382. error_msg("'%s' bad symlink", name);
  383. return 1;
  384. }
  385. if (TT.cwd[1] && (!strstart(&ss, TT.cwd) || (*ss && *ss!='/'))) {
  386. error_msg("'%s' %s not under '%s'", name, s, TT.cwd);
  387. free(s);
  388. return 1;
  389. }
  390. // --restrict means first entry extracted is what everything must be under
  391. if (FLAG(restrict)) {
  392. free(TT.cwd);
  393. TT.cwd = strdup(s);
  394. toys.optflags ^= FLAG_restrict;
  395. }
  396. // use resolved name so trailing / is stripped
  397. if (isdir) unlink(s);
  398. }
  399. // Set deferred utimes() for directories this file isn't under.
  400. // (Files must be depth-first ordered in tarball for this to matter.)
  401. while (TT.dirs) {
  402. // If next file is under (or equal to) this dir, keep waiting
  403. if (name && strstart(&ss, ss = s) && (!*ss || *ss=='/')) break;
  404. wsettime(TT.dirs->str+sizeof(long long), *(long long *)TT.dirs->str);
  405. free(llist_pop(&TT.dirs));
  406. }
  407. free(s);
  408. // name was under TT.cwd
  409. return 0;
  410. }
  411. // write data to file
  412. static void sendfile_sparse(int fd)
  413. {
  414. long long len, used = 0, sent;
  415. int i = 0, j;
  416. do {
  417. if (TT.sparselen) {
  418. // Seek past holes or fill output with zeroes.
  419. if (-1 == lseek(fd, len = TT.sparse[i*2], SEEK_SET)) {
  420. sent = 0;
  421. while (len) {
  422. // first/last 512 bytes used, rest left zeroes
  423. j = (len>3072) ? 3072 : len;
  424. if (j != writeall(fd, toybuf+512, j)) goto error;
  425. len -= j;
  426. }
  427. } else {
  428. sent = len;
  429. if (!(len = TT.sparse[i*2+1]) && ftruncate(fd, sent+len))
  430. perror_msg("ftruncate");
  431. }
  432. if (len+used>TT.hdr.size) error_exit("sparse overflow");
  433. } else len = TT.hdr.size;
  434. len -= sendfile_len(TT.fd, fd, len, &sent);
  435. used += sent;
  436. if (len) {
  437. error:
  438. if (fd!=1) perror_msg(0);
  439. skippy(TT.hdr.size-used);
  440. break;
  441. }
  442. } while (++i<TT.sparselen);
  443. close(fd);
  444. }
  445. static void extract_to_disk(void)
  446. {
  447. char *name = TT.hdr.name;
  448. int ala = TT.hdr.mode, strip;
  449. for (strip = 0; strip < TT.strip_components; strip++) {
  450. char *s = strchr(name, '/');
  451. if (s && s[1]) name = s+1;
  452. else if (S_ISDIR(ala)) return;
  453. else break;
  454. }
  455. if (dirflush(name, S_ISDIR(ala))) {
  456. if (S_ISREG(ala) && !TT.hdr.link_target) skippy(TT.hdr.size);
  457. return;
  458. }
  459. // create path before file if necessary
  460. if (strrchr(name, '/') && mkpath(name) && errno!=EEXIST)
  461. return perror_msg(":%s: can't mkdir", name);
  462. // remove old file, if exists
  463. if (!FLAG(k) && !S_ISDIR(ala) && rmdir(name) && errno!=ENOENT && unlink(name))
  464. return perror_msg("can't remove: %s", name);
  465. if (S_ISREG(ala)) {
  466. // hardlink?
  467. if (TT.hdr.link_target) {
  468. if (link(TT.hdr.link_target, name))
  469. return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
  470. // write contents
  471. } else {
  472. int fd = WARN_ONLY|O_WRONLY|O_CREAT|(FLAG(overwrite) ? O_TRUNC : O_EXCL);
  473. if ((fd = xcreate(name, fd, ala&07777)) != -1) sendfile_sparse(fd);
  474. else return skippy(TT.hdr.size);
  475. }
  476. } else if (S_ISDIR(ala)) {
  477. if ((mkdir(name, 0700) == -1) && errno != EEXIST)
  478. return perror_msg("%s: can't create", name);
  479. } else if (S_ISLNK(ala)) {
  480. if (symlink(TT.hdr.link_target, name))
  481. return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
  482. } else if (mknod(name, ala, TT.hdr.device))
  483. return perror_msg("can't create '%s'", name);
  484. // Set ownership
  485. if (!FLAG(o) && !geteuid()) {
  486. int u = TT.hdr.uid, g = TT.hdr.gid;
  487. if (TT.owner) TT.hdr.uid = TT.ouid;
  488. else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
  489. struct passwd *pw = bufgetpwnamuid(TT.hdr.uname, 0);
  490. if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
  491. }
  492. if (TT.group) TT.hdr.gid = TT.ggid;
  493. else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
  494. struct group *gr = bufgetgrnamgid(TT.hdr.gname, 0);
  495. if (gr) TT.hdr.gid = gr->gr_gid;
  496. }
  497. if (lchown(name, u, g)) perror_msg("chown %d:%d '%s'", u, g, name);;
  498. }
  499. if (!S_ISLNK(ala)) chmod(name, FLAG(p) ? ala : ala&0777);
  500. // Apply mtime.
  501. if (!FLAG(m)) {
  502. if (S_ISDIR(ala)) {
  503. struct string_list *sl;
  504. // Writing files into a directory changes directory timestamps, so
  505. // defer mtime updates until contents written.
  506. sl = xmalloc(sizeof(struct string_list)+sizeof(long long)+strlen(name)+1);
  507. *(long long *)sl->str = TT.hdr.mtime;
  508. strcpy(sl->str+sizeof(long long), name);
  509. sl->next = TT.dirs;
  510. TT.dirs = sl;
  511. } else wsettime(name, TT.hdr.mtime);
  512. }
  513. }
  514. static void unpack_tar(char *first)
  515. {
  516. struct double_list *walk, *delete;
  517. struct tar_hdr tar;
  518. int i, sefd = -1, and = 0;
  519. unsigned maj, min;
  520. char *s;
  521. for (;;) {
  522. if (first) {
  523. memcpy(&tar, first, i = 512);
  524. first = 0;
  525. } else {
  526. // align to next block and read it
  527. if (TT.hdr.size%512) skippy(512-TT.hdr.size%512);
  528. i = readall(TT.fd, &tar, 512);
  529. }
  530. if (i && i!=512) error_exit("short header");
  531. // Two consecutive empty headers ends tar even if there's more data
  532. if (!i || !*tar.name) {
  533. if (!i || and++) return;
  534. TT.hdr.size = 0;
  535. continue;
  536. }
  537. // ensure null temination even of pathological packets
  538. tar.padd[0] = and = 0;
  539. // Is this a valid TAR header?
  540. if (!is_tar_header(&tar)) error_exit("bad header");
  541. TT.hdr.size = OTOI(tar.size);
  542. // If this header isn't writing something to the filesystem
  543. if ((tar.type<'0' || tar.type>'7') && tar.type!='S'
  544. && (*tar.magic && tar.type))
  545. {
  546. // Skip to next record if unknown type or payload > 1 megabyte
  547. if (!strchr("KLx", tar.type) || TT.hdr.size>1<<20) skippy(TT.hdr.size);
  548. // Read link or long name
  549. else if (tar.type != 'x')
  550. alloread(tar.type=='K'?&TT.hdr.link_target:&TT.hdr.name, TT.hdr.size);
  551. // Loop through 'x' payload records in "LEN NAME=VALUE\n" format
  552. else {
  553. char *p, *pp, *buf = 0;
  554. unsigned i, len, n;
  555. alloread(&buf, TT.hdr.size);
  556. for (p = buf; (p-buf)<TT.hdr.size; p += len) {
  557. i = TT.hdr.size-(p-buf);
  558. if (1!=sscanf(p, "%u %n", &len, &n) || len<n+4 || len>i || n>i) {
  559. error_msg("bad header");
  560. break;
  561. }
  562. p[len-1] = 0;
  563. pp = p+n;
  564. // Ignore "RHT." prefix, if any.
  565. strstart(&pp, "RHT.");
  566. if ((FLAG(selinux) && !(FLAG(t)|FLAG(O)))
  567. && strstart(&pp, "security.selinux="))
  568. {
  569. i = strlen(pp);
  570. sefd = xopen("/proc/self/attr/fscreate", O_WRONLY|WARN_ONLY);
  571. if (sefd==-1 || i!=write(sefd, pp, i))
  572. perror_msg("setfscreatecon %s", pp);
  573. } else if (strstart(&pp, "path=")) {
  574. free(TT.hdr.name);
  575. TT.hdr.name = xstrdup(pp);
  576. break;
  577. }
  578. }
  579. free(buf);
  580. }
  581. continue;
  582. }
  583. // Handle sparse file type
  584. TT.sparselen = 0;
  585. if (tar.type == 'S') {
  586. char sparse[512];
  587. int max = 8;
  588. // Load 4 pairs of offset/len from S block, plus 21 pairs from each
  589. // continuation block, list says where to seek/write sparse file contents
  590. s = 386+(char *)&tar;
  591. *sparse = i = 0;
  592. for (;;) {
  593. if (!(TT.sparselen&511))
  594. TT.sparse = xrealloc(TT.sparse, (TT.sparselen+512)*sizeof(long long));
  595. // If out of data in block check continue flag, stop or load next block
  596. if (++i>max || !*s) {
  597. if (!(*sparse ? sparse[504] : ((char *)&tar)[482])) break;
  598. xreadall(TT.fd, s = sparse, 512);
  599. max = 41;
  600. i = 0;
  601. }
  602. // Load next entry
  603. TT.sparse[TT.sparselen++] = otoi(s, 12);
  604. s += 12;
  605. }
  606. // Odd number of entries (from corrupted tar) would be dropped here
  607. TT.sparselen /= 2;
  608. if (TT.sparselen)
  609. TT.hdr.ssize = TT.sparse[2*TT.sparselen-1]+TT.sparse[2*TT.sparselen-2];
  610. } else TT.hdr.ssize = TT.hdr.size;
  611. // At this point, we have something to output. Convert metadata.
  612. TT.hdr.mode = OTOI(tar.mode)&0xfff;
  613. if (tar.type == 'S' || !tar.type) TT.hdr.mode |= 0x8000;
  614. else TT.hdr.mode |= (char []){8,8,10,2,6,4,1,8}[tar.type-'0']<<12;
  615. TT.hdr.uid = OTOI(tar.uid);
  616. TT.hdr.gid = OTOI(tar.gid);
  617. TT.hdr.mtime = OTOI(tar.mtime);
  618. maj = OTOI(tar.major);
  619. min = OTOI(tar.minor);
  620. TT.hdr.device = dev_makedev(maj, min);
  621. TT.hdr.uname = xstrndup(TT.owner ? : tar.uname, sizeof(tar.uname));
  622. TT.hdr.gname = xstrndup(TT.group ? : tar.gname, sizeof(tar.gname));
  623. if (TT.owner) TT.hdr.uid = TT.ouid;
  624. else if (!FLAG(numeric_owner)) {
  625. struct passwd *pw = bufgetpwnamuid(TT.hdr.uname, 0);
  626. if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
  627. }
  628. if (TT.group) TT.hdr.gid = TT.ggid;
  629. else if (!FLAG(numeric_owner)) {
  630. struct group *gr = bufgetgrnamgid(TT.hdr.gname, 0);
  631. if (gr) TT.hdr.gid = gr->gr_gid;
  632. }
  633. if (!TT.hdr.link_target && *tar.link)
  634. TT.hdr.link_target = xstrndup(tar.link, sizeof(tar.link));
  635. if (!TT.hdr.name) {
  636. // Glue prefix and name fields together with / if necessary
  637. i = (tar.type=='S') ? 0 : strnlen(tar.prefix, sizeof(tar.prefix));
  638. TT.hdr.name = xmprintf("%.*s%s%.*s", i, tar.prefix,
  639. (i && tar.prefix[i-1] != '/') ? "/" : "",
  640. (int)sizeof(tar.name), tar.name);
  641. }
  642. // Old broken tar recorded dir as "file with trailing slash"
  643. if (S_ISREG(TT.hdr.mode) && (s = strend(TT.hdr.name, "/"))) {
  644. *s = 0;
  645. TT.hdr.mode = (TT.hdr.mode & ~S_IFMT) | S_IFDIR;
  646. }
  647. // Non-regular files don't have contents stored in archive.
  648. if ((TT.hdr.link_target && *TT.hdr.link_target)
  649. || (tar.type && !S_ISREG(TT.hdr.mode)))
  650. TT.hdr.size = 0;
  651. // Files are seen even if excluded, so check them here.
  652. // TT.seen points to first seen entry in TT.incl, or NULL if none yet.
  653. if ((delete = filter(TT.incl, TT.hdr.name)) && TT.incl != TT.seen) {
  654. if (!TT.seen) TT.seen = delete;
  655. // Move seen entry to end of list.
  656. if (TT.incl == delete) TT.incl = TT.incl->next;
  657. else for (walk = TT.incl; walk != TT.seen; walk = walk->next) {
  658. if (walk == delete) {
  659. dlist_pop(&walk);
  660. dlist_add_nomalloc(&TT.incl, delete);
  661. }
  662. }
  663. }
  664. // Skip excluded files
  665. if (filter(TT.excl, TT.hdr.name) || (TT.incl && !delete))
  666. skippy(TT.hdr.size);
  667. else if (FLAG(t)) {
  668. if (FLAG(v)) {
  669. struct tm *lc = localtime(TT.mtime ? &TT.mtt : &TT.hdr.mtime);
  670. char perm[12], gname[12];
  671. mode_to_string(TT.hdr.mode, perm);
  672. printf("%s", perm);
  673. sprintf(perm, "%u", TT.hdr.uid);
  674. sprintf(gname, "%u", TT.hdr.gid);
  675. printf(" %s/%s ", *TT.hdr.uname ? TT.hdr.uname : perm,
  676. *TT.hdr.gname ? TT.hdr.gname : gname);
  677. if (tar.type=='3' || tar.type=='4') printf("%u,%u", maj, min);
  678. else printf("%9lld", TT.hdr.ssize);
  679. sprintf(perm, ":%02d", lc->tm_sec);
  680. printf(" %d-%02d-%02d %02d:%02d%s ", 1900+lc->tm_year, 1+lc->tm_mon,
  681. lc->tm_mday, lc->tm_hour, lc->tm_min, FLAG(full_time) ? perm : "");
  682. }
  683. printf("%s", TT.hdr.name);
  684. if (TT.hdr.link_target) printf(" -> %s", TT.hdr.link_target);
  685. xputc('\n');
  686. skippy(TT.hdr.size);
  687. } else {
  688. if (FLAG(v)) printf("%s\n", TT.hdr.name);
  689. if (FLAG(O)) sendfile_sparse(1);
  690. else if (FLAG(to_command)) {
  691. if (S_ISREG(TT.hdr.mode)) {
  692. int fd, pid;
  693. xsetenv("TAR_FILETYPE", "f");
  694. xsetenv(xmprintf("TAR_MODE=%o", TT.hdr.mode), 0);
  695. xsetenv(xmprintf("TAR_SIZE=%lld", TT.hdr.ssize), 0);
  696. xsetenv("TAR_FILENAME", TT.hdr.name);
  697. xsetenv("TAR_UNAME", TT.hdr.uname);
  698. xsetenv("TAR_GNAME", TT.hdr.gname);
  699. xsetenv(xmprintf("TAR_MTIME=%llo", (long long)TT.hdr.mtime), 0);
  700. xsetenv(xmprintf("TAR_UID=%o", TT.hdr.uid), 0);
  701. xsetenv(xmprintf("TAR_GID=%o", TT.hdr.gid), 0);
  702. pid = xpopen((char *[]){"sh", "-c", TT.to_command, NULL}, &fd, 0);
  703. // todo: short write exits tar here, other skips data.
  704. sendfile_sparse(fd);
  705. fd = xpclose_both(pid, 0);
  706. if (fd) error_msg("%d: Child returned %d", pid, fd);
  707. }
  708. } else extract_to_disk();
  709. }
  710. if (sefd != -1) {
  711. // zero length write resets fscreate context to default
  712. (void)write(sefd, 0, 0);
  713. close(sefd);
  714. sefd = -1;
  715. }
  716. free(TT.hdr.name);
  717. free(TT.hdr.link_target);
  718. free(TT.hdr.uname);
  719. free(TT.hdr.gname);
  720. TT.hdr.name = TT.hdr.link_target = 0;
  721. }
  722. }
  723. // Add copy of filename (minus trailing \n and /) to dlist **
  724. static void trim2list(void *list, char *pline)
  725. {
  726. char *n = xstrdup(pline);
  727. int i = strlen(n);
  728. dlist_add(list, n);
  729. if (i && n[i-1]=='\n') i--;
  730. while (i && n[i-1] == '/') i--;
  731. n[i] = 0;
  732. }
  733. // do_lines callback, selects TT.incl or TT.excl based on call order
  734. static void do_XT(char **pline, long len)
  735. {
  736. if (pline) trim2list(TT.X ? &TT.excl : &TT.incl, *pline);
  737. }
  738. void tar_main(void)
  739. {
  740. char *s, **args = toys.optargs,
  741. *archiver = FLAG(I) ? TT.I : (FLAG(z) ? "gzip" : (FLAG(J) ? "xz":"bzip2"));
  742. int len = 0;
  743. // Needed when extracting to command
  744. signal(SIGPIPE, SIG_IGN);
  745. // Get possible early errors out of the way
  746. if (!geteuid()) toys.optflags |= FLAG_p;
  747. if (TT.owner) {
  748. if (!(s = strchr(TT.owner, ':'))) TT.ouid = xgetuid(TT.owner);
  749. else {
  750. TT.owner = xstrndup(TT.owner, s++-TT.owner);
  751. TT.ouid = atolx_range(s, 0, INT_MAX);
  752. }
  753. }
  754. if (TT.group) {
  755. if (!(s = strchr(TT.group, ':'))) TT.ggid = xgetgid(TT.group);
  756. else {
  757. TT.group = xstrndup(TT.group, s++-TT.group);
  758. TT.ggid = atolx_range(s, 0, INT_MAX);
  759. }
  760. }
  761. if (TT.mtime) xparsedate(TT.mtime, &TT.mtt, (void *)&s, 1);
  762. // Collect file list.
  763. for (; TT.exclude; TT.exclude = TT.exclude->next)
  764. trim2list(&TT.excl, TT.exclude->arg);
  765. for (;TT.X; TT.X = TT.X->next) do_lines(xopenro(TT.X->arg), '\n', do_XT);
  766. for (args = toys.optargs; *args; args++) trim2list(&TT.incl, *args);
  767. for (;TT.T; TT.T = TT.T->next) do_lines(xopenro(TT.T->arg), '\n', do_XT);
  768. // If include file list empty, don't create empty archive
  769. if (FLAG(c)) {
  770. if (!TT.incl) error_exit("empty archive");
  771. TT.fd = 1;
  772. }
  773. // nommu reentry for nonseekable input skips this, parent did it for us
  774. if (toys.stacktop) {
  775. if (TT.f && strcmp(TT.f, "-"))
  776. TT.fd = xcreate(TT.f, TT.fd*(O_WRONLY|O_CREAT|O_TRUNC), 0666);
  777. // Get destination directory
  778. if (TT.C) xchdir(TT.C);
  779. }
  780. // Get destination directory
  781. TT.cwd = xabspath(s = xgetcwd(), ABS_PATH);
  782. free(s);
  783. // Remember archive inode so we don't overwrite it or add it to itself
  784. {
  785. struct stat st;
  786. if (!fstat(TT.fd, &st)) {
  787. TT.aino = st.st_ino;
  788. TT.adev = st.st_dev;
  789. }
  790. }
  791. // Are we reading?
  792. if (FLAG(x)||FLAG(t)) {
  793. char *hdr = 0;
  794. // autodetect compression type when not specified
  795. if (!(FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J))) {
  796. len = xread(TT.fd, hdr = toybuf+sizeof(toybuf)-512, 512);
  797. if (len!=512 || !is_tar_header(hdr)) {
  798. // detect gzip and bzip signatures
  799. if (SWAP_BE16(*(short *)hdr)==0x1f8b) toys.optflags |= FLAG_z;
  800. else if (!memcmp(hdr, "BZh", 3)) toys.optflags |= FLAG_j;
  801. else if (peek_be(hdr, 7) == 0xfd377a585a0000UL) toys.optflags |= FLAG_J;
  802. else error_exit("Not tar");
  803. // if we can seek back we don't need to loop and copy data
  804. if (!lseek(TT.fd, -len, SEEK_CUR)) hdr = 0;
  805. }
  806. }
  807. if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)) {
  808. int pipefd[2] = {hdr ? -1 : TT.fd, -1}, i, pid;
  809. struct string_list *zcat = FLAG(I) ? 0 : find_in_path(getenv("PATH"),
  810. FLAG(j) ? "bzcat" : FLAG(J) ? "xzcat" : "zcat");
  811. // Toybox provides more decompressors than compressors, so try them first
  812. TT.pid = xpopen_both(zcat ? (char *[]){zcat->str, 0} :
  813. (char *[]){archiver, "-d", 0}, pipefd);
  814. if (CFG_TOYBOX_FREE) llist_traverse(zcat, free);
  815. if (!hdr) {
  816. // If we could seek, child gzip inherited fd and we read its output
  817. close(TT.fd);
  818. TT.fd = pipefd[1];
  819. } else {
  820. // If we autodetected type but then couldn't lseek to put the data back
  821. // we have to loop reading data from TT.fd and pass it to gzip ourselves
  822. // (starting with the block of data we read to autodetect).
  823. // dirty trick: move gzip input pipe to stdin so child closes spare copy
  824. dup2(pipefd[0], 0);
  825. if (pipefd[0]) close(pipefd[0]);
  826. // Fork a copy of ourselves to handle extraction (reads from zip output
  827. // pipe, writes to stdout).
  828. pipefd[0] = pipefd[1];
  829. pipefd[1] = 1;
  830. pid = xpopen_both(0, pipefd);
  831. close(pipefd[1]);
  832. // loop writing collated data to zip proc
  833. xwrite(0, hdr, len);
  834. for (;;) {
  835. if ((i = read(TT.fd, toybuf, sizeof(toybuf)))<1) {
  836. close(0);
  837. xwaitpid(pid);
  838. return;
  839. }
  840. xwrite(0, toybuf, i);
  841. }
  842. }
  843. }
  844. unpack_tar(hdr);
  845. dirflush(0, 0);
  846. // Shut up archiver about inability to write all trailing NULs to pipe buf
  847. if (TT.pid>0) kill(TT.pid, 9);
  848. // Each time a TT.incl entry is seen it's moved to the end of the list,
  849. // with TT.seen pointing to first seen list entry. Anything between
  850. // TT.incl and TT.seen wasn't encountered in archive..
  851. if (TT.seen != TT.incl) {
  852. if (!TT.seen) TT.seen = TT.incl;
  853. while (TT.incl != TT.seen) {
  854. error_msg("'%s' not in archive", TT.incl->data);
  855. TT.incl = TT.incl->next;
  856. }
  857. }
  858. // are we writing? (Don't have to test flag here, one of 3 must be set)
  859. } else {
  860. struct double_list *dl = TT.incl;
  861. // autodetect compression type based on -f name. (Use > to avoid.)
  862. if (TT.f && !FLAG(j) && !FLAG(z) && !FLAG(I) && !FLAG(J)) {
  863. char *tbz[] = {".tbz", ".tbz2", ".tar.bz", ".tar.bz2"};
  864. if (strend(TT.f, ".tgz") || strend(TT.f, ".tar.gz"))
  865. toys.optflags |= FLAG_z;
  866. if (strend(TT.f, ".txz") || strend(TT.f, ".tar.xz"))
  867. toys.optflags |= FLAG_J;
  868. else for (len = 0; len<ARRAY_LEN(tbz); len++)
  869. if (strend(TT.f, tbz[len])) toys.optflags |= FLAG_j;
  870. }
  871. if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)) {
  872. int pipefd[2] = {-1, TT.fd};
  873. xpopen_both((char *[]){archiver, 0}, pipefd);
  874. close(TT.fd);
  875. TT.fd = pipefd[0];
  876. }
  877. do {
  878. TT.warn = 1;
  879. dirtree_flagread(dl->data, FLAG(h) ? DIRTREE_SYMFOLLOW : 0, add_to_tar);
  880. } while (TT.incl != (dl = dl->next));
  881. writeall(TT.fd, toybuf, 1024);
  882. }
  883. if (toys.exitval) error_msg("had errors");
  884. if (CFG_TOYBOX_FREE) {
  885. llist_traverse(TT.excl, llist_free_double);
  886. llist_traverse(TT.incl, llist_free_double);
  887. while(TT.hlc) free(TT.hlx[--TT.hlc].arg);
  888. free(TT.hlx);
  889. free(TT.cwd);
  890. close(TT.fd);
  891. }
  892. }