を記載しておきます。検証した環境はWindows 10 Pro, Nim 0.17.2です。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | import os, streams, strutils, sequtils, times type TarError* = object of Exception proc align(data: string, length: int): string = if data.len >= length: ## trimming return data[0 ..< length] else: ## padding return data & repeat('\0', length - data.len) proc roundup(len: int): int {.inline.} = return (len + 511) and not 511 proc fill(exdata: var string, indata: string, pos: int) {.inline.} = # replace substring of exdata by indata starting from pos exdata[pos ..< (pos + indata.len)] = indata proc initialHeader(typeflag: char): string = # fill by Null so as need not to care null termination var header = repeat('\0', 512) ## mode (char[8], asciiz) case typeflag of 'L': header.fill("0000000", 100) # link of '5': header.fill("0000777", 100) # directory else: header.fill("0000666", 100) # file header.fill("0000000", 108) ## uid (char[8], asciiz) header.fill("0000000", 116) ## gid (char[8], asciiz) header.fill("00000000000", 124) ## initial size (char[12], asciiz) header.fill("00000000000", 136) ## initial mtime (char[12], asciiz) header.fill(" ", 148) ## initial checksum (char[8]) header[156] = typeflag header.fill("ustar ", 257) ## GNU tar magic(char[6]) and version (char[2]) header.fill("0000000", 329) ## dev major (char[8], asciiz) header.fill("0000000", 337) ## dev minor (char[8], asciiz) return header proc getChecksum(header: string): string = var total = 0 for item in header.replace("\0", ""): total += item.int return total.toOct(6) & "\0 " proc makeHeader(srcPath, basePath: string): tuple[body: string, isfile: bool] = var header, preheader = "" # make paths let absPath = (basePath / srcPath).replace("\\", "/") let relPath = (basePath.extractFilename / srcPath).replace("\\", "/") if relPath.len > 100: # long link header preheader = initialHeader('L') preheader.fill("././@LongLink", 0) preheader.fill((relPath.len + 1).toOct(11), 124) preheader.fill(getChecksum(preheader), 148) # append full path as content block preheader = preheader & align(relPath, roundup(relPath.len)) var fi = absPath.getFileInfo() case fi.kind of pcDir: header = initialHeader('5') ## init as directory of pcFile: header = initialHeader('0') ## init as file header.fill(fi.size.toOct(11), 124) else: raise newException(TarError, "invalid file/dir info") header.fill(relPath.align(100), 0) ## name (char[100]) header.fill(fi.lastWriteTime.toSeconds.toBiggestInt.toOct(11), 136) # use following after Nim 0.18.0 # header.fill(fi.lastWriteTime.toUnix.toOct(11), 136) header.fill(getChecksum(header), 148) ## checksum return (preheader & header, fi.kind == pcFile) proc writeContent(fout: File, srcPath, basePath: string) = const COPY_BUFFER_SIZE = 1024 * 1024 * 4 ## must be larger than 512 let absPath = basePath / srcPath srclen = absPath.getFileSize() cycle = srclen div COPY_BUFFER_SIZE padding = roundup(srclen.int) - srclen var fin = newFileStream(absPath) defer: fin.close() for i in 1 .. cycle: fout.write(fin.readStr(COPY_BUFFER_SIZE)) fout.write(fin.readAll() & '\0'.repeat(padding)) proc scanDir(args: varargs[string]): seq[string] = # scan directories and files recursively and returns relative paths var dirPath, baseDir: string = args[0] if args.len == 1: result = @[""] else: baseDir = args[1] result = @[ dirPath[baseDir.len + 1 ..< dirPath.len] ] for item in walkDir(dirPath): case item.kind of pcDir: result = concat(result, scanDir(item.path, baseDir)) of pcFile: result.add(item.path[baseDir.len + 1 ..< item.path.len]) else: continue # ignore symliks proc pack*(srcPath, dstPath: string, rmIncomplete = true) = ## Make tar file (dstPath) from specified directory (srcPath) ## - designed as to support GNU tar like archiver (not exactly compatible) ## + limitations: ## - max. file size, single member file must be smaller than 8GB ## - does NOT support metadata such as user/group id, permissions.. ## - does NOT support symbolic links, just ignoring if included ## - source path must be directory, not file path ## - source directory name will be used as root directory of tar contents try: if not srcPath.existsDir(): raise newException(TarError, "source directory is not found") if not dstPath.parentDir.existsDir(): raise newException(TarError, "destination directory is not found") var tf = open(dstPath, fmWrite) defer: tf.close() var targetDir = srcPath if targetDir.endsWith(AltSep) or targetDir.endsWith(DirSep): targetDir = targetDir[0 ..< (targetDir.len - 1)] # process per file in the directory for item in scanDir(targetDir): # write header var header = makeHeader(item, targetDir) tf.write(header.body) if header.isfile: # write content tf.writeContent(item, targetDir) # terminate file with 2 blocks end-of-archive let eoa = repeat('\0', 1024) tf.write(eoa) except: echo(repr(getCurrentException())) if rmIncomplete and dstPath.existsFile(): echo "removing incomplete tar file..." dstPath.removeFile() proc writeFile(srcPath, dstPath: string, position, length: int) = const COPY_BUFFER_SIZE = 1024 * 1024 * 4 var fin = newFileStream(srcPath) fout = open(dstPath, fmWrite) cycle = length div COPY_BUFFER_SIZE remain = length mod COPY_BUFFER_SIZE defer: fin.close() fout.close() fin.setPosition(position) for i in 1 .. cycle: fout.write(fin.readStr(COPY_BUFFER_SIZE)) fout.write(fin.readStr(remain)) proc unpack*(srcPath, dstPath: string) = ## Extract tar file (srcPath) to specified directory (dstPath) ## - designed as to support POSIX ustar and GNU tar like files ## + limitations: ## - does NOT apply metadata, such as user/group id, last modified date.. ## - does NOT support symbolic links, just continues silently ## - does NOT support Pax format try: if not srcPath.existsFile(): raise newException(TarError, "source file is not found") if not dstPath.existsDir(): raise newException(TarError, "destination directory is not found") var tfs = newFileStream(srcPath) defer: tfs.close() while not tfs.atEnd(): var header = tfs.readStr(512) filename = header[0 .. 100] fileSize = parseOctInt(header[124 .. 134]) typeflag = header[156] let magic = header[257 ..< (257+6)] if typeflag == 'L': ## GNU tar; resolve long link filename = tfs.readStr(roundup(fileSize))[0 ..< fileSize].replace("\0", "") header = tfs.readStr(512) fileSize = parseOctInt(header[124 .. 134]) typeflag = header[156] if magic == "ustar\0": ## POSIX ustar; use prefix for long file name filename = split(header[345 ..< (345 + 155)], '\0')[0] / filename elif magic == "ustar ": ## GNU tar discard else: ## must be end-of-archive or unsupported format break if filename.isAbsolute(): ## absolute path raise newException(TarError, "not supported file name") if "/../" in filename or r"\..\" in filename: ## outer path raise newException(TarError, "not supported directory path") let path = dstPath / filename case typeflag of '0', '\0': ## file var position = tfs.getPosition() writeFile(srcPath, path, position, fileSize) tfs.setPosition(position + roundup(fileSize)) of '5': ## directory createDir(path) else: ## ignores symlinks and others continue except: echo(repr(getCurrentException())) |
1 2 3 4 5 6 | from simpletar import pack let src = "C:/path/to/dir" dst = "D:/path/to/tarfile.tar" # archive src dir to dst tar file pack(src, dst) |
1 2 3 4 5 6 | from simpletar import unpack let src = "D:/path/to/tarfile.tar" dst = "C:/path/to/anotherdir" # extract src tar file to dst dir unpack(src, dst) |
プロシージャで階層をたどってパスとその種別(ファイル or ディレクトリ)を取得します。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | proc initialHeader(typeflag: char): string = # fill by Null so as need not to care null termination var header = repeat('\0', 512) ## mode (char[8], asciiz) case typeflag of 'L': header.fill("0000000", 100) # link of '5': header.fill("0000777", 100) # directory else: header.fill("0000666", 100) # file header.fill("0000000", 108) ## uid (char[8], asciiz) header.fill("0000000", 116) ## gid (char[8], asciiz) header.fill("00000000000", 124) ## initial size (char[12], asciiz) header.fill("00000000000", 136) ## initial mtime (char[12], asciiz) header.fill(" ", 148) ## initial checksum (char[8]) header[156] = typeflag header.fill("ustar ", 257) ## GNU tar magic(char[6]) and version (char[2]) header.fill("0000000", 329) ## dev major (char[8], asciiz) header.fill("0000000", 337) ## dev minor (char[8], asciiz) return header |
1 2 3 4 5 6 7 8 | if relPath.len > 100: # long link header preheader = initialHeader('L') preheader.fill("././@LongLink", 0) preheader.fill((relPath.len + 1).toOct(11), 124) preheader.fill(getChecksum(preheader), 148) # append full path as content block preheader = preheader & align(relPath, roundup(relPath.len)) |
チェックサムとするため各文字をint (unsigned)に型変換し総和を取ります。ただし、初期化段階でチェックサム用のフィールドchecksum[8]
1 2 3 4 5 | proc getChecksum(header: string): string = var total = 0 for item in header.replace("\0", ""): total += item.int return total.toOct(6) & "\0 " |
入力ファイルストリーム(元のファイル)から出力ファイルストリーム(tarファイル)へコピーします。一息に開くとおそらくメモリ不足で死ぬので、バッファサイズをconst (COPY_BUFFER_SIZE
1 2 3 4 5 6 7 8 9 | proc writeContent(fout: File, srcPath, basePath: string) = ... var fin = newFileStream(absPath) defer: fin.close() for i in 1 .. cycle: fout.write(fin.readStr(COPY_BUFFER_SIZE)) fout.write(fin.readAll() & '\0'.repeat(padding)) |
基本的にはいずれの形式が入ってくるかはわかりませんが、少なくともPOSIX ustarとGNU tarを処理できれば実用上は十分そう。逆に言えば、Pax交換フォーマットはサポートしません。
GNU tarでLフラグが設定されているものは、長いパスを格納しているブロックになりますので、ファイル名のみコンテントから抽出し、他の内容はその次のヘッダ以降を参照します。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | proc unpack*(srcPath, dstPath: string) = ... var header = tfs.readStr(512) filename = header[0 .. 100] fileSize = parseOctInt(header[124 .. 134]) typeflag = header[156] let magic = header[257 .. <(257+6)] if typeflag == 'L': ## GNU tar; resolve long link filename = tfs.readStr(roundup(fileSize))[0 .. <fileSize].replace("\0", "") header = tfs.readStr(512) fileSize = parseOctInt(header[124 .. 134]) typeflag = header[156] |
POSIX ustarの場合はprefixフィールドに追加のパスが格納されている可能性があるので、ヌル終端文字列として読み、連結します。
1 2 | if magic == "ustar\0": ## POSIX ustar; use prefix for long file name filename = split(header[345 .. <(345 + 155)], '\0')[0] / filename |
1 2 3 4 5 6 | let path = dstPath / filename case typeflag of '0', '\0': ## file var position = tfs.getPosition() writeFile(srcPath, path, position, fileSize) tfs.setPosition(position + roundup(fileSize)) |