Commit | Line | Data |
---|---|---|
78954a05 MN |
1 | ============================================= |
2 | SNOW Video Codec Specification Draft 20070103 | |
3 | ============================================= | |
4 | ||
11de04d8 MN |
5 | Intro: |
6 | ====== | |
7 | This Specification describes the snow syntax and semmantics as well as | |
8 | how to decode snow. | |
9 | The decoding process is precissely described and any compliant decoder | |
10 | MUST produce the exactly same output for a spec conformant snow stream. | |
11 | For encoding though any process which generates a stream compliant to | |
12 | the syntactical and semmantical requirements and which is decodeable by | |
13 | the process described in this spec shall be considered a conformant | |
14 | snow encoder. | |
78954a05 MN |
15 | |
16 | Definitions: | |
17 | ============ | |
18 | ||
19 | MUST the specific part must be done to conform to this standard | |
20 | SHOULD it is recommended to be done that way, but not strictly required | |
21 | ||
22 | ilog2(x) is the rounded down logarithm of x with basis 2 | |
23 | ilog2(0) = 0 | |
24 | ||
25 | Type definitions: | |
26 | ================= | |
27 | ||
28 | b 1-bit range coded | |
29 | u unsigned scalar value range coded | |
30 | s signed scalar value range coded | |
31 | ||
32 | ||
33 | Bitstream syntax: | |
34 | ================= | |
35 | ||
36 | frame: | |
37 | header | |
38 | prediction | |
39 | residual | |
40 | ||
41 | header: | |
42 | keyframe b MID_STATE | |
43 | if(keyframe || always_reset) | |
44 | reset_contexts | |
45 | if(keyframe){ | |
46 | version u header_state | |
47 | always_reset b header_state | |
48 | temporal_decomposition_type u header_state | |
49 | temporal_decomposition_count u header_state | |
50 | spatial_decomposition_count u header_state | |
51 | colorspace_type u header_state | |
52 | chroma_h_shift u header_state | |
53 | chroma_v_shift u header_state | |
54 | spatial_scalability b header_state | |
55 | max_ref_frames-1 u header_state | |
56 | qlogs | |
57 | } | |
e9314de6 | 58 | if(!keyframe){ |
b85bf991 MN |
59 | update_mc b header_state |
60 | if(update_mc){ | |
e9314de6 MN |
61 | for(plane=0; plane<2; plane++){ |
62 | diag_mc b header_state | |
63 | htaps/2-1 u header_state | |
64 | for(i= p->htaps/2; i; i--) | |
65 | |hcoeff[i]| u header_state | |
66 | } | |
67 | } | |
bc66275b MN |
68 | update_qlogs b header_state |
69 | if(update_qlogs){ | |
70 | spatial_decomposition_count u header_state | |
71 | qlogs | |
72 | } | |
e9314de6 | 73 | } |
78954a05 MN |
74 | |
75 | spatial_decomposition_type s header_state | |
76 | qlog s header_state | |
77 | mv_scale s header_state | |
78 | qbias s header_state | |
79 | block_max_depth s header_state | |
80 | ||
81 | qlogs: | |
82 | for(plane=0; plane<2; plane++){ | |
83 | quant_table[plane][0][0] s header_state | |
84 | for(level=0; level < spatial_decomposition_count; level++){ | |
85 | quant_table[plane][level][1]s header_state | |
86 | quant_table[plane][level][3]s header_state | |
87 | } | |
88 | } | |
89 | ||
90 | reset_contexts | |
91 | *_state[*]= MID_STATE | |
92 | ||
93 | prediction: | |
94 | for(y=0; y<block_count_vertical; y++) | |
95 | for(x=0; x<block_count_horizontal; x++) | |
96 | block(0) | |
97 | ||
98 | block(level): | |
c3922c65 | 99 | mvx_diff=mvy_diff=y_diff=cb_diff=cr_diff=0 |
78954a05 MN |
100 | if(keyframe){ |
101 | intra=1 | |
78954a05 MN |
102 | }else{ |
103 | if(level!=max_block_depth){ | |
104 | s_context= 2*left->level + 2*top->level + topleft->level + topright->level | |
105 | leaf b block_state[4 + s_context] | |
106 | } | |
107 | if(level==max_block_depth || leaf){ | |
108 | intra b block_state[1 + left->intra + top->intra] | |
109 | if(intra){ | |
110 | y_diff s block_state[32] | |
111 | cb_diff s block_state[64] | |
112 | cr_diff s block_state[96] | |
113 | }else{ | |
114 | ref_context= ilog2(2*left->ref) + ilog2(2*top->ref) | |
115 | if(ref_frames > 1) | |
116 | ref u block_state[128 + 1024 + 32*ref_context] | |
117 | mx_context= ilog2(2*abs(left->mx - top->mx)) | |
118 | my_context= ilog2(2*abs(left->my - top->my)) | |
119 | mvx_diff s block_state[128 + 32*(mx_context + 16*!!ref)] | |
120 | mvy_diff s block_state[128 + 32*(my_context + 16*!!ref)] | |
121 | } | |
122 | }else{ | |
123 | block(level+1) | |
124 | block(level+1) | |
125 | block(level+1) | |
126 | block(level+1) | |
127 | } | |
128 | } | |
129 | ||
130 | ||
131 | residual: | |
48fe9238 MN |
132 | residual2(luma) |
133 | residual2(chroma_cr) | |
134 | residual2(chroma_cb) | |
135 | ||
136 | residual2: | |
137 | for(level=0; level<spatial_decomposition_count; level++){ | |
138 | if(level==0) | |
139 | subband(LL, 0) | |
140 | subband(HL, level) | |
141 | subband(LH, level) | |
142 | subband(HH, level) | |
143 | } | |
144 | ||
145 | subband: | |
78954a05 MN |
146 | FIXME |
147 | ||
148 | ||
149 | ||
150 | Tag description: | |
151 | ---------------- | |
152 | ||
153 | version | |
154 | 0 | |
155 | this MUST NOT change within a bitstream | |
156 | ||
157 | always_reset | |
158 | if 1 then the range coder contexts will be reset after each frame | |
159 | ||
160 | temporal_decomposition_type | |
161 | 0 | |
162 | ||
163 | temporal_decomposition_count | |
164 | 0 | |
165 | ||
166 | spatial_decomposition_count | |
167 | FIXME | |
168 | ||
169 | colorspace_type | |
170 | 0 | |
171 | this MUST NOT change within a bitstream | |
172 | ||
173 | chroma_h_shift | |
174 | log2(luma.width / chroma.width) | |
175 | this MUST NOT change within a bitstream | |
176 | ||
177 | chroma_v_shift | |
178 | log2(luma.height / chroma.height) | |
179 | this MUST NOT change within a bitstream | |
180 | ||
181 | spatial_scalability | |
182 | 0 | |
183 | ||
184 | max_ref_frames | |
185 | maximum number of reference frames | |
186 | this MUST NOT change within a bitstream | |
187 | ||
e9314de6 MN |
188 | update_mc |
189 | indicates that motion compensation filter parameters are stored in the | |
190 | header | |
191 | ||
192 | diag_mc | |
193 | flag to enable faster diagonal interpolation | |
194 | this SHOULD be 1 unless it turns out to be covered by a valid patent | |
195 | ||
196 | htaps | |
197 | number of half pel interpolation filter taps, MUST be even, >0 and <10 | |
198 | ||
199 | hcoeff | |
200 | half pel interpolation filter coefficients, hcoeff[0] are the 2 middle | |
201 | coefficients [1] are the next outer ones and so on, resulting in a filter | |
202 | like: ...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... | |
203 | the sign of the coefficients is not explicitly stored but alternates | |
204 | after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... | |
205 | hcoeff[0] is not explicitly stored but found by subtracting the sum | |
206 | of all stored coefficients with signs from 32 | |
207 | hcoeff[0]= 32 - hcoeff[1] - hcoeff[2] - ... | |
208 | a good choice for hcoeff and htaps is | |
209 | htaps= 6 | |
210 | hcoeff={40,-10,2} | |
211 | an alternative which requires more computations at both encoder and | |
212 | decoder side and may or may not be better is | |
213 | htaps= 8 | |
214 | hcoeff={42,-14,6,-2} | |
215 | ||
216 | ||
78954a05 MN |
217 | ref_frames |
218 | minimum of the number of available reference frames and max_ref_frames | |
219 | for example the first frame after a key frame always has ref_frames=1 | |
220 | ||
221 | spatial_decomposition_type | |
222 | wavelet type | |
223 | 0 is a 9/7 symmetric compact integer wavelet | |
224 | 1 is a 5/3 symmetric compact integer wavelet | |
225 | others are reserved | |
226 | stored as delta from last, last is reset to 0 if always_reset || keyframe | |
227 | ||
228 | qlog | |
229 | quality (logarthmic quantizer scale) | |
230 | stored as delta from last, last is reset to 0 if always_reset || keyframe | |
231 | ||
232 | mv_scale | |
233 | stored as delta from last, last is reset to 0 if always_reset || keyframe | |
24dbec7c | 234 | FIXME check that everything works fine if this changes between frames |
78954a05 MN |
235 | |
236 | qbias | |
237 | dequantization bias | |
238 | stored as delta from last, last is reset to 0 if always_reset || keyframe | |
239 | ||
240 | block_max_depth | |
241 | maximum depth of the block tree | |
242 | stored as delta from last, last is reset to 0 if always_reset || keyframe | |
243 | ||
244 | quant_table | |
245 | quantiztation table | |
246 | ||
8f39b74d MN |
247 | |
248 | Highlevel bitstream structure: | |
249 | ============================= | |
250 | -------------------------------------------- | |
251 | | Header | | |
252 | -------------------------------------------- | |
253 | | ------------------------------------ | | |
254 | | | Block0 | | | |
255 | | | split? | | | |
256 | | | yes no | | | |
257 | | | ......... intra? | | | |
258 | | | : Block01 : yes no | | | |
259 | | | : Block02 : ....... .......... | | | |
260 | | | : Block03 : : y DC : : ref index: | | | |
261 | | | : Block04 : : cb DC : : motion x : | | | |
262 | | | ......... : cr DC : : motion y : | | | |
263 | | | ....... .......... | | | |
264 | | ------------------------------------ | | |
265 | | ------------------------------------ | | |
266 | | | Block1 | | | |
267 | | ... | | |
268 | -------------------------------------------- | |
269 | | ------------ ------------ ------------ | | |
270 | || Y subbands | | Cb subbands| | Cr subbands|| | |
271 | || --- --- | | --- --- | | --- --- || | |
272 | || |LL0||HL0| | | |LL0||HL0| | | |LL0||HL0| || | |
273 | || --- --- | | --- --- | | --- --- || | |
274 | || --- --- | | --- --- | | --- --- || | |
275 | || |LH0||HH0| | | |LH0||HH0| | | |LH0||HH0| || | |
276 | || --- --- | | --- --- | | --- --- || | |
277 | || --- --- | | --- --- | | --- --- || | |
278 | || |HL1||LH1| | | |HL1||LH1| | | |HL1||LH1| || | |
279 | || --- --- | | --- --- | | --- --- || | |
280 | || --- --- | | --- --- | | --- --- || | |
281 | || |HH1||HL2| | | |HH1||HL2| | | |HH1||HL2| || | |
282 | || ... | | ... | | ... || | |
283 | | ------------ ------------ ------------ | | |
284 | -------------------------------------------- | |
285 | ||
286 | Decoding process: | |
287 | ================= | |
288 | ||
289 | ------------ | |
290 | | | | |
291 | | Subbands | | |
292 | ------------ | | | |
293 | | | ------------ | |
294 | | Intra DC | | | |
295 | | | LL0 subband prediction | |
296 | ------------ | | |
297 | \ Dequantizaton | |
298 | ------------------- \ | | |
299 | | Reference frames | \ IDWT | |
300 | | ------- ------- | Motion \ | | |
301 | ||Frame 0| |Frame 1|| Compensation . OBMC v ------- | |
302 | | ------- ------- | --------------. \------> + --->|Frame n|-->output | |
303 | | ------- ------- | ------- | |
304 | ||Frame 2| |Frame 3||<----------------------------------/ | |
305 | | ... | | |
306 | ------------------- | |
307 | ||
308 | ||
78954a05 MN |
309 | Range Coder: |
310 | ============ | |
59edca9a MN |
311 | |
312 | Binary Range Coder: | |
313 | ------------------- | |
e5635270 | 314 | The implemented range coder is an adapted version based upon "Range encoding: |
315 | an algorithm for removing redundancy from a digitised message." by G. N. N. | |
316 | Martin. | |
482e74ad | 317 | The symbols encoded by the snow range coder are bits (0|1). The |
e5635270 | 318 | associated probabilities are not fix but change depending on the symbol mix |
319 | seen so far. | |
320 | ||
78954a05 | 321 | |
ca087dbc MN |
322 | bit seen | new state |
323 | ---------+-------------------------------------------- | |
324 | 0 | 256 - state_transition_table[256 - old_state]; | |
325 | 1 | state_transition_table[ old_state]; | |
326 | ||
327 | state_transition_table = { | |
328 | 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, | |
329 | 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, | |
330 | 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, | |
331 | 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, | |
332 | 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, | |
333 | 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, | |
334 | 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, | |
335 | 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, | |
336 | 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, | |
337 | 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, | |
338 | 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, | |
339 | 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, | |
340 | 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, | |
341 | 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, | |
342 | 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, | |
343 | 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0}; | |
344 | ||
59edca9a MN |
345 | FIXME |
346 | ||
347 | ||
aa6b38c2 | 348 | Range Coding of integers: |
59edca9a MN |
349 | -------------------------- |
350 | FIXME | |
351 | ||
ca087dbc | 352 | |
78954a05 MN |
353 | Neighboring Blocks: |
354 | =================== | |
355 | left and top are set to the respective blocks unless they are outside of | |
356 | the image in which case they are set to the Null block | |
357 | ||
90b5b51e | 358 | top-left is set to the top left block unless it is outside of the image in |
78954a05 MN |
359 | which case it is set to the left block |
360 | ||
90b5b51e | 361 | if this block has no larger parent block or it is at the left side of its |
78954a05 MN |
362 | parent block and the top right block is not outside of the image then the |
363 | top right block is used for top-right else the top-left block is used | |
364 | ||
365 | Null block | |
366 | y,cb,cr are 128 | |
367 | level, ref, mx and my are 0 | |
368 | ||
369 | ||
370 | Motion Vector Prediction: | |
371 | ========================= | |
372 | 1. the motion vectors of all the neighboring blocks are scaled to | |
373 | compensate for the difference of reference frames | |
374 | ||
375 | scaled_mv= (mv * (256 * (current_reference+1) / (mv.reference+1)) + 128)>>8 | |
376 | ||
377 | 2. the median of the scaled left, top and top-right vectors is used as | |
378 | motion vector prediction | |
379 | ||
380 | 3. the used motion vector is the sum of the predictor and | |
381 | (mvx_diff, mvy_diff)*mv_scale | |
382 | ||
383 | ||
384 | Intra DC Predicton: | |
385 | ====================== | |
386 | the luma and chroma values of the left block are used as predictors | |
387 | ||
388 | the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff | |
2cc45470 | 389 | to reverse this in the decoder apply the following: |
c3922c65 MN |
390 | block[y][x].dc[0] = block[y][x-1].dc[0] + y_diff; |
391 | block[y][x].dc[1] = block[y][x-1].dc[1] + cb_diff; | |
392 | block[y][x].dc[2] = block[y][x-1].dc[2] + cr_diff; | |
2cc45470 | 393 | block[*][-1].dc[*]= 128; |
78954a05 MN |
394 | |
395 | ||
396 | Motion Compensation: | |
397 | ==================== | |
e9314de6 MN |
398 | |
399 | Halfpel interpolation: | |
400 | ---------------------- | |
401 | halfpel interpolation is done by convolution with the halfpel filter stored | |
402 | in the header: | |
403 | ||
404 | horizontal halfpel samples are found by | |
405 | H1[y][x] = hcoeff[0]*(F[y][x ] + F[y][x+1]) | |
406 | + hcoeff[1]*(F[y][x-1] + F[y][x+2]) | |
407 | + hcoeff[2]*(F[y][x-2] + F[y][x+3]) | |
408 | + ... | |
409 | h1[y][x] = (H1[y][x] + 32)>>6; | |
410 | ||
411 | vertical halfpel samples are found by | |
412 | H2[y][x] = hcoeff[0]*(F[y ][x] + F[y+1][x]) | |
413 | + hcoeff[1]*(F[y-1][x] + F[y+2][x]) | |
414 | + ... | |
415 | h2[y][x] = (H2[y][x] + 32)>>6; | |
416 | ||
417 | vertical+horizontal halfpel samples are found by | |
418 | H3[y][x] = hcoeff[0]*(H2[y][x ] + H2[y][x+1]) | |
419 | + hcoeff[1]*(H2[y][x-1] + H2[y][x+2]) | |
420 | + ... | |
421 | H3[y][x] = hcoeff[0]*(H1[y ][x] + H1[y+1][x]) | |
422 | + hcoeff[1]*(H1[y+1][x] + H1[y+2][x]) | |
423 | + ... | |
424 | h3[y][x] = (H3[y][x] + 2048)>>12; | |
425 | ||
426 | ||
427 | F H1 F | |
428 | | | | | |
429 | | | | | |
430 | | | | | |
431 | F H1 F | |
432 | | | | | |
433 | | | | | |
434 | | | | | |
435 | F-------F-------F-> H1<-F-------F-------F | |
436 | v v v | |
437 | H2 H3 H2 | |
438 | ^ ^ ^ | |
439 | F-------F-------F-> H1<-F-------F-------F | |
440 | | | | | |
441 | | | | | |
442 | | | | | |
443 | F H1 F | |
444 | | | | | |
445 | | | | | |
446 | | | | | |
447 | F H1 F | |
448 | ||
449 | ||
450 | unavailable fullpel samples (outside the picture for example) shall be equal | |
451 | to the closest available fullpel sample | |
452 | ||
453 | ||
454 | Smaller pel interpolation: | |
455 | -------------------------- | |
456 | if diag_mc is set then points which lie on a line between 2 vertically, | |
457 | horiziontally or diagonally adjacent halfpel points shall be interpolated | |
458 | linearls with rounding to nearest and halfway values rounded up. | |
459 | points which lie on 2 diagonals at the same time should only use the one | |
460 | diagonal not containing the fullpel point | |
461 | ||
462 | ||
463 | ||
464 | F-->O---q---O<--h1->O---q---O<--F | |
465 | v \ / v \ / v | |
466 | O O O O O O O | |
467 | | / | \ | | |
468 | q q q q q | |
469 | | / | \ | | |
470 | O O O O O O O | |
471 | ^ / \ ^ / \ ^ | |
472 | h2-->O---q---O<--h3->O---q---O<--h2 | |
473 | v \ / v \ / v | |
474 | O O O O O O O | |
475 | | \ | / | | |
476 | q q q q q | |
477 | | \ | / | | |
478 | O O O O O O O | |
479 | ^ / \ ^ / \ ^ | |
480 | F-->O---q---O<--h1->O---q---O<--F | |
481 | ||
482 | ||
483 | ||
484 | the remaining points shall be bilinearly interpolated from the | |
a11dc59a MN |
485 | up to 4 surrounding halfpel and fullpel points, again rounding should be to |
486 | nearest and halfway values rounded up | |
e9314de6 MN |
487 | |
488 | compliant snow decoders MUST support 1-1/8 pel luma and 1/2-1/16 pel chroma | |
489 | interpolation at least | |
490 | ||
491 | ||
492 | Overlapped block motion compensation: | |
493 | ------------------------------------- | |
78954a05 MN |
494 | FIXME |
495 | ||
496 | LL band prediction: | |
497 | =================== | |
1e37b7e4 MN |
498 | Each sample in the LL0 subband is predicted by the median of the left, top and |
499 | left+top-topleft samples, samples outside the subband shall be considered to | |
500 | be 0. To reverse this prediction in the decoder apply the following. | |
501 | for(y=0; y<height; y++){ | |
502 | for(x=0; x<width; x++){ | |
503 | sample[y][x] += median(sample[y-1][x], | |
504 | sample[y][x-1], | |
505 | sample[y-1][x]+sample[y][x-1]-sample[y-1][x-1]); | |
506 | } | |
507 | } | |
508 | sample[-1][*]=sample[*][-1]= 0; | |
509 | width,height here are the width and height of the LL0 subband not of the final | |
510 | video | |
511 | ||
78954a05 MN |
512 | |
513 | Dequantizaton: | |
514 | ============== | |
515 | FIXME | |
516 | ||
517 | Wavelet Transform: | |
518 | ================== | |
fdb99704 MN |
519 | |
520 | Snow supports 2 wavelet transforms, the symmetric biorthogonal 5/3 integer | |
521 | transform and a integer approximation of the symmetric biorthogonal 9/7 | |
522 | daubechies wavelet. | |
523 | ||
09671ce7 MN |
524 | 2D IDWT (inverse discrete wavelet transform) |
525 | -------------------------------------------- | |
526 | The 2D IDWT applies a 2D filter recursively, each time combining the | |
527 | 4 lowest frequency subbands into a single subband until only 1 subband | |
528 | remains. | |
529 | The 2D filter is done by first applying a 1D filter in the vertical direction | |
530 | and then applying it in the horizontal one. | |
531 | --------------- --------------- --------------- --------------- | |
532 | |LL0|HL0| | | | | | | | | | | | | |
7397cf3f | 533 | |---+---| HL1 | | L0|H0 | HL1 | | LL1 | HL1 | | | | |
09671ce7 MN |
534 | |LH0|HH0| | | | | | | | | | | | |
535 | |-------+-------|->|-------+-------|->|-------+-------|->| L1 | H1 |->... | |
536 | | | | | | | | | | | | | | |
537 | | LH1 | HH1 | | LH1 | HH1 | | LH1 | HH1 | | | | | |
538 | | | | | | | | | | | | | | |
539 | --------------- --------------- --------------- --------------- | |
540 | ||
541 | ||
542 | 1D Filter: | |
543 | ---------- | |
544 | 1. interleave the samples of the low and high frequency subbands like | |
545 | s={L0, H0, L1, H1, L2, H2, L3, H3, ... } | |
546 | note, this can end with a L or a H, the number of elements shall be w | |
547 | s[-1] shall be considered equivalent to s[1 ] | |
548 | s[w ] shall be considered equivalent to s[w-2] | |
549 | ||
550 | 2. perform the lifting steps in order as described below | |
551 | ||
552 | 5/3 Integer filter: | |
553 | 1. s[i] -= (s[i-1] + s[i+1] + 2)>>2; for all even i < w | |
554 | 2. s[i] += (s[i-1] + s[i+1] )>>1; for all odd i < w | |
555 | ||
556 | \ | /|\ | /|\ | /|\ | /|\ | |
557 | \|/ | \|/ | \|/ | \|/ | | |
558 | + | + | + | + | -1/4 | |
559 | /|\ | /|\ | /|\ | /|\ | | |
560 | / | \|/ | \|/ | \|/ | \|/ | |
561 | | + | + | + | + +1/2 | |
562 | ||
563 | ||
564 | snows 9/7 Integer filter: | |
565 | 1. s[i] -= (3*(s[i-1] + s[i+1]) + 4)>>3; for all even i < w | |
566 | 2. s[i] -= s[i-1] + s[i+1] ; for all odd i < w | |
567 | 3. s[i] += ( s[i-1] + s[i+1] + 4*s[i] + 8)>>4; for all even i < w | |
568 | 4. s[i] += (3*(s[i-1] + s[i+1]) )>>1; for all odd i < w | |
569 | ||
570 | \ | /|\ | /|\ | /|\ | /|\ | |
571 | \|/ | \|/ | \|/ | \|/ | | |
572 | + | + | + | + | -3/8 | |
573 | /|\ | /|\ | /|\ | /|\ | | |
574 | / | \|/ | \|/ | \|/ | \|/ | |
575 | (| + (| + (| + (| + -1 | |
576 | \ + /|\ + /|\ + /|\ + /|\ +1/4 | |
577 | \|/ | \|/ | \|/ | \|/ | | |
578 | + | + | + | + | +1/16 | |
579 | /|\ | /|\ | /|\ | /|\ | | |
580 | / | \|/ | \|/ | \|/ | \|/ | |
581 | | + | + | + | + +3/2 | |
fdb99704 | 582 | |
a282102d MN |
583 | optimization tips: |
584 | following are exactly identical | |
585 | (3a)>>1 == a + (a>>1) | |
586 | (a + 4b + 8)>>4 == ((a>>2) + b + 2)>>2 | |
78954a05 | 587 | |
6a1aa752 MN |
588 | 16bit implementation note: |
589 | The IDWT can be implemented with 16bits, but this requires some care to | |
590 | prevent overflows, the following list, lists the minimum number of bits needed | |
591 | for some terms | |
592 | 1. lifting step | |
593 | A= s[i-1] + s[i+1] 16bit | |
594 | 3*A + 4 18bit | |
595 | A + (A>>1) + 2 17bit | |
596 | ||
597 | 3. lifting step | |
598 | s[i-1] + s[i+1] 17bit | |
599 | ||
600 | 4. lifiting step | |
601 | 3*(s[i-1] + s[i+1]) 17bit | |
602 | ||
603 | ||
78954a05 MN |
604 | TODO: |
605 | ===== | |
606 | Important: | |
607 | finetune initial contexts | |
78954a05 MN |
608 | flip wavelet? |
609 | try to use the wavelet transformed predicted image (motion compensated image) as context for coding the residual coefficients | |
610 | try the MV length as context for coding the residual coefficients | |
611 | use extradata for stuff which is in the keyframes now? | |
612 | the MV median predictor is patented IIRC | |
2b6134b3 | 613 | implement per picture halfpel interpolation |
c78fc717 | 614 | try different range coder state transition tables for different contexts |
78954a05 MN |
615 | |
616 | Not Important: | |
c64a8712 | 617 | compare the 6 tap and 8 tap hpel filters (psnr/bitrate and subjective quality) |
78954a05 MN |
618 | spatial_scalability b vs u (!= 0 breaks syntax anyway so we can add a u later) |
619 | ||
620 | ||
621 | Credits: | |
622 | ======== | |
623 | Michael Niedermayer | |
624 | Loren Merritt | |
625 | ||
626 | ||
627 | Copyright: | |
628 | ========== | |
629 | GPL + GFDL + whatever is needed to make this a RFC |